xref: /Universal-ctags/main/parse.c (revision b5cc9e0d37e2944d6d0965d2f6edcd66ec509fac)
1 /*
2 *   Copyright (c) 1996-2003, Darren Hiebert
3 *
4 *   This source code is released for free distribution under the terms of the
5 *   GNU General Public License version 2 or (at your option) any later version.
6 *
7 *   This module contains functions for managing input languages and
8 *   dispatching files to the appropriate language parser.
9 */
10 
11 /*
12 *   INCLUDE FILES
13 */
14 #include "general.h"  /* must always come first */
15 
16 /* TODO: This definition should be removed. */
17 #define OPTION_WRITE
18 #include "options_p.h"
19 
20 #include <string.h>
21 
22 #include "ctags.h"
23 #include "debug.h"
24 #include "entry_p.h"
25 #include "field_p.h"
26 #include "flags_p.h"
27 #include "htable.h"
28 #include "keyword.h"
29 #include "lxpath_p.h"
30 #include "param.h"
31 #include "param_p.h"
32 #include "parse_p.h"
33 #include "parsers_p.h"
34 #include "promise.h"
35 #include "promise_p.h"
36 #include "ptag_p.h"
37 #include "ptrarray.h"
38 #include "read.h"
39 #include "read_p.h"
40 #include "routines.h"
41 #include "routines_p.h"
42 #include "stats_p.h"
43 #include "subparser.h"
44 #include "subparser_p.h"
45 #include "trace.h"
46 #include "trashbox.h"
47 #include "trashbox_p.h"
48 #include "vstring.h"
49 #ifdef HAVE_ICONV
50 # include "mbcs_p.h"
51 #endif
52 #include "writer_p.h"
53 #include "xtag_p.h"
54 
55 /*
56  * DATA TYPES
57  */
58 enum specType {
59 	SPEC_NONE,
60 	SPEC_NAME,
61 	SPEC_ALIAS = SPEC_NAME,
62 	SPEC_EXTENSION,
63 	SPEC_PATTERN,
64 };
65 const char *specTypeName [] = {
66 	"none", "name", "extension", "pattern"
67 };
68 
69 typedef struct {
70 	langType lang;
71 	const char* spec;
72 	enum specType specType;
73 }  parserCandidate;
74 
75 typedef struct sParserObject {
76 	parserDefinition *def;
77 
78 	kindDefinition* fileKind;
79 
80 	stringList* currentPatterns;   /* current list of file name patterns */
81 	stringList* currentExtensions; /* current list of extensions */
82 	stringList* currentAliases;    /* current list of aliases */
83 
84 	unsigned int initialized:1;    /* initialize() is called or not */
85 	unsigned int dontEmit:1;	   /* run but don't emit tags.
86 									  This parser was disabled but a subparser on
87 									  this parser makes this parser run (to drive
88 									  the subparser). */
89 	unsigned int pseudoTagPrinted:1;   /* pseudo tags about this parser
90 										  is emitted or not. */
91 	unsigned int used;			/* Used for printing language specific statistics. */
92 
93 	unsigned int anonymousIdentiferId; /* managed by anon* functions */
94 
95 	struct slaveControlBlock *slaveControlBlock;
96 	struct kindControlBlock  *kindControlBlock;
97 	struct lregexControlBlock *lregexControlBlock;
98 
99 	langType pretendingAsLanguage; /* OLDLANG in --_pretend-<NEWLANG>=<OLDLANG>
100 									  is set here if this parser is NEWLANG.
101 									  LANG_IGNORE is set if no pretending. */
102 	langType pretendedAsLanguage;  /* NEWLANG in --_pretend-<NEWLANG>=<OLDLANG>
103 									  is set here if this parser is OLDLANG.
104 									  LANG_IGNORE is set if no being pretended. */
105 
106 } parserObject;
107 
108 /*
109  * FUNCTION PROTOTYPES
110  */
111 
112 static void lazyInitialize (langType language);
113 static void addParserPseudoTags (langType language);
114 static void installKeywordTable (const langType language);
115 static void installTagRegexTable (const langType language);
116 static void installTagXpathTable (const langType language);
117 static void anonResetMaybe (parserObject *parser);
118 static void setupAnon (void);
119 static void teardownAnon (void);
120 static void uninstallTagXpathTable (const langType language);
121 
122 /*
123 *   DATA DEFINITIONS
124 */
125 static parserDefinition *FallbackParser (void);
126 static parserDefinition *CTagsParser (void);
127 static parserDefinition *CTagsSelfTestParser (void);
128 static parserDefinitionFunc* BuiltInParsers[] = {
129 #ifdef EXTERNAL_PARSER_LIST
130 	EXTERNAL_PARSER_LIST
131 #else  /* ! EXTERNAL_PARSER_LIST */
132 	CTagsParser,				/* This must be first entry. */
133 	FallbackParser,				/* LANG_FALLBACK */
134 	CTagsSelfTestParser,
135 
136 	PARSER_LIST,
137 	XML_PARSER_LIST
138 #ifdef HAVE_LIBXML
139 	,
140 #endif
141 	YAML_PARSER_LIST
142 #ifdef HAVE_LIBYAML
143 	,
144 #endif
145        PEG_PARSER_LIST
146 #ifdef HAVE_PACKCC
147        ,
148 #endif
149 	   OPTLIB2C_PCRE2_PARSER_LIST
150 #ifdef HAVE_PCRE2
151       ,
152 #endif
153 #endif	/* EXTERNAL_PARSER_LIST */
154 };
155 static parserObject* LanguageTable = NULL;
156 static unsigned int LanguageCount = 0;
157 static hashTable* LanguageHTable = NULL;
158 static kindDefinition defaultFileKind = {
159 	.enabled     = false,
160 	.letter      = KIND_FILE_DEFAULT_LETTER,
161 	.name        = KIND_FILE_DEFAULT_NAME,
162 	.description = KIND_FILE_DEFAULT_NAME,
163 };
164 
165 /*
166 *   FUNCTION DEFINITIONS
167 */
168 
isLanguageNameChar(int c)169 static bool isLanguageNameChar(int c)
170 {
171 	if (isgraph(c))
172 	{
173 		if (c == '\'' || c == '"' || c == ';')
174 			return false;
175 		return true;
176 	}
177 	else
178 		return false;
179 }
180 
countParsers(void)181 extern unsigned int countParsers (void)
182 {
183 	return LanguageCount;
184 }
185 
makeSimpleTag(const vString * const name,const int kindIndex)186 extern int makeSimpleTag (
187 		const vString* const name, const int kindIndex)
188 {
189 	return makeSimpleRefTag (name, kindIndex, ROLE_DEFINITION_INDEX);
190 }
191 
makeSimpleRefTag(const vString * const name,const int kindIndex,int roleIndex)192 extern int makeSimpleRefTag (const vString* const name, const int kindIndex,
193 			     int roleIndex)
194 {
195 	int r = CORK_NIL;
196 
197 	Assert (roleIndex < (int)countInputLanguageRoles(kindIndex));
198 
199 	/* do not check for kind being disabled - that happens later in makeTagEntry() */
200 	if (name != NULL  &&  vStringLength (name) > 0)
201 	{
202 	    tagEntryInfo e;
203 	    initRefTagEntry (&e, vStringValue (name), kindIndex, roleIndex);
204 
205 	    r = makeTagEntry (&e);
206 	}
207 	return r;
208 }
209 
makeSimplePlaceholder(const vString * const name)210 extern int makeSimplePlaceholder(const vString* const name)
211 {
212 	return makePlaceholder (vStringValue (name));
213 }
214 
isLanguageEnabled(const langType language)215 extern bool isLanguageEnabled (const langType language)
216 {
217 	const parserDefinition* const def = LanguageTable [language].def;
218 	return def->enabled;
219 }
220 
isLanguageVisible(const langType language)221 extern bool isLanguageVisible (const langType language)
222 {
223 	const parserDefinition* const lang = LanguageTable [language].def;
224 
225 	return !lang->invisible;
226 }
227 
228 /*
229 *   parserDescription mapping management
230 */
231 
parserNew(const char * name)232 extern parserDefinition* parserNew (const char* name)
233 {
234 	parserDefinition* result = xCalloc (1, parserDefinition);
235 	result->name = eStrdup (name);
236 
237 	result->enabled = true;
238 	return result;
239 }
240 
doesLanguageAllowNullTag(const langType language)241 extern bool doesLanguageAllowNullTag (const langType language)
242 {
243 	Assert (0 <= language  &&  language < (int) LanguageCount);
244 	return LanguageTable [language].def->allowNullTag;
245 }
246 
doesLanguageRequestAutomaticFQTag(const langType language)247 extern bool doesLanguageRequestAutomaticFQTag (const langType language)
248 {
249 	Assert (0 <= language  &&  language < (int) LanguageCount);
250 	return LanguageTable [language].def->requestAutomaticFQTag;
251 }
252 
getLanguageNameFull(const langType language,bool noPretending)253 static const char *getLanguageNameFull (const langType language, bool noPretending)
254 {
255 	const char* result;
256 
257 	if (language == LANG_IGNORE)
258 		result = "unknown";
259 	else
260 	{
261 		Assert (0 <= language  &&  language < (int) LanguageCount);
262 		if (noPretending)
263 			result = LanguageTable [language].def->name;
264 		else
265 		{
266 			langType real_language = LanguageTable [language].pretendingAsLanguage;
267 			if (real_language == LANG_IGNORE)
268 				result = LanguageTable [language].def->name;
269 			else
270 			{
271 				Assert (0 <= real_language  &&  real_language < (int) LanguageCount);
272 				result = LanguageTable [real_language].def->name;
273 			}
274 		}
275 	}
276 	return result;
277 }
278 
getLanguageName(const langType language)279 extern const char *getLanguageName (const langType language)
280 {
281 	return getLanguageNameFull (language, false);
282 }
283 
getLanguageKindName(const langType language,const int kindIndex)284 extern const char *getLanguageKindName (const langType language, const int kindIndex)
285 {
286 	kindDefinition* kdef = getLanguageKind (language, kindIndex);
287 	return kdef->name;
288 }
289 
290 static kindDefinition kindGhost = {
291 	.letter = KIND_GHOST_LETTER,
292 	.name = KIND_GHOST_NAME,
293 	.description = KIND_GHOST_NAME,
294 };
295 
defineLanguageKind(const langType language,kindDefinition * def,freeKindDefFunc freeKindDef)296 extern int defineLanguageKind (const langType language, kindDefinition *def,
297 							   freeKindDefFunc freeKindDef)
298 {
299 	return defineKind (LanguageTable [language].kindControlBlock, def, freeKindDef);
300 }
301 
countLanguageKinds(const langType language)302 extern unsigned int countLanguageKinds (const langType language)
303 {
304 	return countKinds (LanguageTable [language].kindControlBlock);
305 }
306 
countLanguageRoles(const langType language,int kindIndex)307 extern unsigned int countLanguageRoles (const langType language, int kindIndex)
308 {
309 	return countRoles (LanguageTable [language].kindControlBlock, kindIndex);
310 }
311 
getLanguageKind(const langType language,int kindIndex)312 extern kindDefinition* getLanguageKind (const langType language, int kindIndex)
313 {
314 	kindDefinition* kdef;
315 
316 	Assert (0 <= language  &&  language < (int) LanguageCount);
317 
318 	switch (kindIndex)
319 	{
320 	case KIND_FILE_INDEX:
321 		kdef = LanguageTable [language].fileKind;
322 		break;
323 	case KIND_GHOST_INDEX:
324 		kdef = &kindGhost;
325 		break;
326 	default:
327 		Assert (kindIndex >= 0);
328 		kdef = getKind (LanguageTable [language].kindControlBlock, kindIndex);
329 	}
330 	return kdef;
331 }
332 
getLanguageKindForLetter(const langType language,char kindLetter)333 extern kindDefinition* getLanguageKindForLetter (const langType language, char kindLetter)
334 {
335 	Assert (0 <= language  &&  language < (int) LanguageCount);
336 	if (kindLetter == LanguageTable [language].fileKind->letter)
337 		return LanguageTable [language].fileKind;
338 	else if (kindLetter == KIND_GHOST_LETTER)
339 		return &kindGhost;
340 	else
341 		return getKindForLetter (LanguageTable [language].kindControlBlock, kindLetter);
342 }
343 
getLanguageKindForName(const langType language,const char * kindName)344 extern kindDefinition* getLanguageKindForName (const langType language, const char *kindName)
345 {
346 	Assert (0 <= language  &&  language < (int) LanguageCount);
347 	Assert (kindName);
348 
349 	if (strcmp(kindName, LanguageTable [language].fileKind->name) == 0)
350 		return LanguageTable [language].fileKind;
351 	else if (strcmp(kindName, KIND_GHOST_NAME) == 0)
352 		return &kindGhost;
353 	else
354 		return getKindForName (LanguageTable [language].kindControlBlock, kindName);
355 }
356 
getLanguageRole(const langType language,int kindIndex,int roleIndex)357 extern roleDefinition* getLanguageRole(const langType language, int kindIndex, int roleIndex)
358 {
359 	return getRole (LanguageTable [language].kindControlBlock, kindIndex, roleIndex);
360 }
361 
getLanguageRoleForName(const langType language,int kindIndex,const char * roleName)362 extern roleDefinition* getLanguageRoleForName (const langType language, int kindIndex,
363 											   const char *roleName)
364 {
365 	return getRoleForName (LanguageTable [language].kindControlBlock, kindIndex, roleName);
366 }
367 
getNamedLanguageFull(const char * const name,size_t len,bool noPretending,bool include_aliases)368 extern langType getNamedLanguageFull (const char *const name, size_t len, bool noPretending,
369 									  bool include_aliases)
370 {
371 	langType result = LANG_IGNORE;
372 	unsigned int i;
373 	Assert (name != NULL);
374 
375 	if (len == 0)
376 	{
377 		parserDefinition *def = (parserDefinition *)hashTableGetItem (LanguageHTable, name);
378 		if (def)
379 			result = def->id;
380 	}
381 	else
382 		for (i = 0  ;  i < LanguageCount  &&  result == LANG_IGNORE  ;  ++i)
383 		{
384 			const parserDefinition* const lang = LanguageTable [i].def;
385 			Assert (lang->name);
386 			vString* vstr = vStringNewInit (name);
387 			vStringTruncate (vstr, len);
388 
389 			if (strcasecmp (vStringValue (vstr), lang->name) == 0)
390 				result = i;
391 			else if (include_aliases)
392 			{
393 				stringList* const aliases = LanguageTable [i].currentAliases;
394 				if (aliases && stringListCaseMatched (aliases, vStringValue (vstr)))
395 					result = i;
396 			}
397 			vStringDelete (vstr);
398 		}
399 
400 	if (result != LANG_IGNORE
401 		&& (!noPretending)
402 		&& LanguageTable [result].pretendedAsLanguage != LANG_IGNORE)
403 		result = LanguageTable [result].pretendedAsLanguage;
404 
405 	return result;
406 }
407 
getNamedLanguage(const char * const name,size_t len)408 extern langType getNamedLanguage (const char *const name, size_t len)
409 {
410 	return getNamedLanguageFull (name, len, false, false);
411 }
412 
getNamedLanguageOrAlias(const char * const name,size_t len)413 extern langType getNamedLanguageOrAlias (const char *const name, size_t len)
414 {
415 	return getNamedLanguageFull (name, len, false, true);
416 }
417 
getNameOrAliasesLanguageAndSpec(const char * const key,langType start_index,const char ** const spec,enum specType * specType)418 static langType getNameOrAliasesLanguageAndSpec (const char *const key, langType start_index,
419 						 const char **const spec, enum specType *specType)
420 {
421 	langType result = LANG_IGNORE;
422 	unsigned int i;
423 
424 
425 	if (start_index == LANG_AUTO)
426 	        start_index = 0;
427 	else if (start_index == LANG_IGNORE || start_index >= (int) LanguageCount)
428 		return result;
429 
430 	for (i = start_index  ;  i < LanguageCount  &&  result == LANG_IGNORE  ;  ++i)
431 	{
432 		if (! isLanguageEnabled (i))
433 			continue;
434 
435 		const parserObject* const parser = LanguageTable + i;
436 		stringList* const aliases = parser->currentAliases;
437 		vString* tmp;
438 
439 		if (parser->def->name != NULL && strcasecmp (key, parser->def->name) == 0)
440 		{
441 			result = i;
442 			*spec = parser->def->name;
443 			*specType = SPEC_NAME;
444 		}
445 		else if (aliases != NULL  &&  (tmp = stringListFileFinds (aliases, key)))
446 		{
447 			result = i;
448 			*spec = vStringValue(tmp);
449 			*specType = SPEC_ALIAS;
450 		}
451 	}
452 	return result;
453 }
454 
getLanguageForCommand(const char * const command,langType startFrom)455 extern langType getLanguageForCommand (const char *const command, langType startFrom)
456 {
457 	const char *const tmp_command = baseFilename (command);
458 	char *tmp_spec;
459 	enum specType tmp_specType;
460 
461 	return getNameOrAliasesLanguageAndSpec (tmp_command, startFrom,
462 											(const char **const)&tmp_spec,
463 											&tmp_specType);
464 }
465 
getPatternLanguageAndSpec(const char * const baseName,langType start_index,const char ** const spec,enum specType * specType)466 static langType getPatternLanguageAndSpec (const char *const baseName, langType start_index,
467 					   const char **const spec, enum specType *specType)
468 {
469 	langType result = LANG_IGNORE;
470 	unsigned int i;
471 
472 	if (start_index == LANG_AUTO)
473 	        start_index = 0;
474 	else if (start_index == LANG_IGNORE || start_index >= (int) LanguageCount)
475 		return result;
476 
477 	*spec = NULL;
478 	for (i = start_index  ;  i < LanguageCount  &&  result == LANG_IGNORE  ;  ++i)
479 	{
480 		if (! isLanguageEnabled (i))
481 			continue;
482 
483 		parserObject *parser = LanguageTable + i;
484 		stringList* const ptrns = parser->currentPatterns;
485 		vString* tmp;
486 
487 		if (ptrns != NULL && (tmp = stringListFileFinds (ptrns, baseName)))
488 		{
489 			result = i;
490 			*spec = vStringValue(tmp);
491 			*specType = SPEC_PATTERN;
492 			goto found;
493 		}
494 	}
495 
496 	for (i = start_index  ;  i < LanguageCount  &&  result == LANG_IGNORE  ;  ++i)
497 	{
498 		if (! isLanguageEnabled (i))
499 			continue;
500 
501 		parserObject *parser = LanguageTable + i;
502 		stringList* const exts = parser->currentExtensions;
503 		vString* tmp;
504 
505 		if (exts != NULL && (tmp = stringListExtensionFinds (exts,
506 								     fileExtension (baseName))))
507 		{
508 			result = i;
509 			*spec = vStringValue(tmp);
510 			*specType = SPEC_EXTENSION;
511 			goto found;
512 		}
513 	}
514 found:
515 	return result;
516 }
517 
getLanguageForFilename(const char * const filename,langType startFrom)518 extern langType getLanguageForFilename (const char *const filename, langType startFrom)
519 {
520 	const char *const tmp_filename = baseFilename (filename);
521 	char *tmp_spec;
522 	enum specType tmp_specType;
523 
524 	return getPatternLanguageAndSpec (tmp_filename, startFrom,
525 									  (const char **const)&tmp_spec,
526 									  &tmp_specType);
527 }
528 
scopeSeparatorFor(langType language,int kindIndex,int parentKindIndex)529 const char *scopeSeparatorFor (langType language, int kindIndex, int parentKindIndex)
530 {
531 	Assert (0 <= language  &&  language < (int) LanguageCount);
532 
533 	parserObject *parser = LanguageTable + language;
534 	struct kindControlBlock *kcb = parser->kindControlBlock;
535 
536 	const scopeSeparator *sep = getScopeSeparator (kcb, kindIndex, parentKindIndex);
537 	return sep? sep->separator: NULL;
538 }
539 
processLangDefineScopesep(const langType language,const char * const option,const char * const parameter)540 static bool processLangDefineScopesep(const langType language,
541 								  const char *const option,
542 								  const char *const parameter)
543 {
544 	parserObject *parser;
545 	const char * p = parameter;
546 
547 
548 	char parentKletter;
549 	int parentKindex = KIND_FILE_INDEX;
550 	char kletter;
551 	int kindex = KIND_FILE_INDEX;
552 	const char *separator;
553 
554 	Assert (0 <= language  &&  language < (int) LanguageCount);
555 	parser = LanguageTable + language;
556 
557 
558 	/*
559 	 * Parent
560 	 */
561 	parentKletter = p[0];
562 
563 	if (parentKletter == '\0')
564 		error (FATAL, "no scope separator specified in \"--%s\" option", option);
565 	else if (parentKletter == '/')
566 		parentKindex = KIND_GHOST_INDEX;
567 	else if (parentKletter == KIND_WILDCARD_LETTER)
568 		parentKindex = KIND_WILDCARD_INDEX;
569 	else if (parentKletter == KIND_FILE_DEFAULT_LETTER)
570 		error (FATAL,
571 			   "the kind letter `%c' in \"--%s\" option is reserved for \"%s\" kind and no separator can be assigned to",
572 			   KIND_FILE_DEFAULT_LETTER, option, KIND_FILE_DEFAULT_NAME);
573 	else if (isalpha (parentKletter))
574 	{
575 		kindDefinition *kdef = getKindForLetter (parser->kindControlBlock, parentKletter);
576 		if (kdef == NULL)
577 			error (FATAL,
578 				   "the kind for letter `%c' specified in \"--%s\" option is not defined.",
579 				   parentKletter, option);
580 		parentKindex = kdef->id;
581 	}
582 	else
583 		error (FATAL,
584 			   "the kind letter `%c` given in \"--%s\" option is not an alphabet",
585 			   parentKletter, option);
586 
587 
588 	/*
589 	 * Child
590 	 */
591 	if (parentKindex == KIND_GHOST_INDEX)
592 		kletter = p[1];
593 	else
594 	{
595 		if (p[1] != '/')
596 			error (FATAL,
597 				   "wrong separator specification in \"--%s\" option: no slash after parent kind letter `%c'",
598 				   option, parentKletter);
599 		kletter = p[2];
600 	}
601 
602 	if (kletter == '\0')
603 		error (FATAL, "no child kind letter in \"--%s\" option", option);
604 	else if (kletter == '/')
605 		error (FATAL,
606 			   "wrong separator specification in \"--%s\" option: don't specify slash char twice: %s",
607 			   option, parameter);
608 	else if (kletter == ':')
609 		error (FATAL,
610 			   "no child kind letter in \"--%s\" option", option);
611 	else if (kletter == KIND_WILDCARD_LETTER)
612 	{
613 		if (parentKindex != KIND_WILDCARD_INDEX
614 			&& parentKindex != KIND_GHOST_INDEX)
615 			error (FATAL,
616 				   "cannot use wild card for child kind unless parent kind is also wild card or empty");
617 		kindex = KIND_WILDCARD_INDEX;
618 	}
619 	else if (kletter == KIND_FILE_DEFAULT_LETTER)
620 		error (FATAL,
621 			   "the kind letter `%c' in \"--%s\" option is reserved for \"%s\" kind and no separator can be assigned to",
622 			   KIND_FILE_DEFAULT_LETTER, option, KIND_FILE_DEFAULT_NAME);
623 	else if (isalpha (kletter))
624 	{
625 		kindDefinition *kdef = getKindForLetter (parser->kindControlBlock, kletter);
626 		if (kdef == NULL)
627 			error (FATAL,
628 				   "the kind for letter `%c' specified in \"--%s\" option is not defined.",
629 				   kletter, option);
630 		kindex = kdef->id;
631 	}
632 	else
633 		error (FATAL,
634 			   "the kind letter `%c` given in \"--%s\" option is not an alphabet",
635 			   kletter, option);
636 
637 	/*
638 	 * Separator
639 	 */
640 	if (parentKindex == KIND_GHOST_INDEX)
641 	{
642 		if (p[2] != ':')
643 			error (FATAL,
644 				   "wrong separator specification in \"--%s\" option: cannot find a colon after child kind: %s",
645 				   option, parameter);
646 		separator = p + 3;
647 	}
648 	else
649 	{
650 		if (p[3] != ':')
651 			error (FATAL,
652 				   "wrong separator specification in \"--%s\" option: cannot find a colon after child kind: %s",
653 				   option, parameter);
654 		separator = p + 4;
655 	}
656 
657 	Assert (parentKindex != KIND_FILE_INDEX);
658 	Assert (kindex != KIND_FILE_INDEX);
659 	defineScopeSeparator (parser->kindControlBlock, kindex, parentKindex, separator);
660 	return true;
661 }
662 
processScopesepOption(const char * const option,const char * const parameter)663 extern bool processScopesepOption (const char *const option, const char * const parameter)
664 {
665 	langType language;
666 
667 	language = getLanguageComponentInOption (option, "_scopesep-");
668 	if (language == LANG_IGNORE)
669 		return false;
670 
671 	return processLangDefineScopesep (language, option, parameter);
672 }
673 
parserCandidateNew(unsigned int count CTAGS_ATTR_UNUSED)674 static parserCandidate* parserCandidateNew(unsigned int count CTAGS_ATTR_UNUSED)
675 {
676 	parserCandidate* candidates;
677 	unsigned int i;
678 
679 	candidates= xMalloc(LanguageCount, parserCandidate);
680 	for (i = 0; i < LanguageCount; i++)
681 	{
682 		candidates[i].lang = LANG_IGNORE;
683 		candidates[i].spec = NULL;
684 		candidates[i].specType = SPEC_NONE;
685 	}
686 	return candidates;
687 }
688 
689 /* If multiple parsers are found, return LANG_AUTO */
nominateLanguageCandidates(const char * const key,parserCandidate ** candidates)690 static unsigned int nominateLanguageCandidates (const char *const key, parserCandidate** candidates)
691 {
692 	unsigned int count;
693 	langType i;
694 	const char* spec = NULL;
695 	enum specType specType = SPEC_NONE;
696 
697 	*candidates = parserCandidateNew(LanguageCount);
698 
699 	for (count = 0, i = LANG_AUTO; i != LANG_IGNORE; )
700 	{
701 		i = getNameOrAliasesLanguageAndSpec (key, i, &spec, &specType);
702 		if (i != LANG_IGNORE)
703 		{
704 			(*candidates)[count].lang = i++;
705 			(*candidates)[count].spec = spec;
706 			(*candidates)[count++].specType = specType;
707 		}
708 	}
709 
710 	return count;
711 }
712 
713 static unsigned int
nominateLanguageCandidatesForPattern(const char * const baseName,parserCandidate ** candidates)714 nominateLanguageCandidatesForPattern(const char *const baseName, parserCandidate** candidates)
715 {
716 	unsigned int count;
717 	langType i;
718 	const char* spec;
719 	enum specType specType = SPEC_NONE;
720 
721 	*candidates = parserCandidateNew(LanguageCount);
722 
723 	for (count = 0, i = LANG_AUTO; i != LANG_IGNORE; )
724 	{
725 		i = getPatternLanguageAndSpec (baseName, i, &spec, &specType);
726 		if (i != LANG_IGNORE)
727 		{
728 			(*candidates)[count].lang = i++;
729 			(*candidates)[count].spec = spec;
730 			(*candidates)[count++].specType = specType;
731 		}
732 	}
733 	return count;
734 }
735 
736 static vString* extractEmacsModeAtFirstLine(MIO* input);
737 
738 /*  The name of the language interpreter, either directly or as the argument
739  *  to "env".
740  */
determineInterpreter(const char * const cmd)741 static vString* determineInterpreter (const char* const cmd)
742 {
743 	vString* const interpreter = vStringNew ();
744 	const char* p = cmd;
745 	do
746 	{
747 		vStringClear (interpreter);
748 		for ( ;  isspace ((int) *p)  ;  ++p)
749 			;  /* no-op */
750 		for ( ;  *p != '\0'  &&  ! isspace ((int) *p)  ;  ++p)
751 			vStringPut (interpreter, (int) *p);
752 	} while (strcmp (vStringValue (interpreter), "env") == 0);
753 	return interpreter;
754 }
755 
extractInterpreter(MIO * input)756 static vString* extractInterpreter (MIO* input)
757 {
758 	vString* const vLine = vStringNew ();
759 	const char* const line = readLineRaw (vLine, input);
760 	vString* interpreter = NULL;
761 
762 	if (line != NULL  &&  line [0] == '#'  &&  line [1] == '!')
763 	{
764 		/* "48.2.4.1 Specifying File Variables" of Emacs info:
765 		   ---------------------------------------------------
766 		   In shell scripts, the first line is used to
767 		   identify the script interpreter, so you
768 		   cannot put any local variables there.  To
769 		   accommodate this, Emacs looks for local
770 		   variable specifications in the _second_
771 		   line if the first line specifies an
772 		   interpreter.  */
773 
774 		interpreter = extractEmacsModeAtFirstLine(input);
775 		if (!interpreter)
776 		{
777 			const char* const lastSlash = strrchr (line, '/');
778 			const char *const cmd = lastSlash != NULL ? lastSlash+1 : line+2;
779 			interpreter = determineInterpreter (cmd);
780 		}
781 	}
782 	vStringDelete (vLine);
783 	return interpreter;
784 }
785 
determineEmacsModeAtFirstLine(const char * const line)786 static vString* determineEmacsModeAtFirstLine (const char* const line)
787 {
788 	vString* mode = vStringNew ();
789 
790 	const char* p = strstr(line, "-*-");
791 	if (p == NULL)
792 		goto out;
793 	p += strlen("-*-");
794 
795 	for ( ;  isspace ((int) *p)  ;  ++p)
796 		;  /* no-op */
797 
798 	if (strncasecmp(p, "mode:", strlen("mode:")) == 0)
799 	{
800 		/* -*- mode: MODE; -*- */
801 		p += strlen("mode:");
802 		for ( ;  isspace ((int) *p)  ;  ++p)
803 			;  /* no-op */
804 		for ( ;  *p != '\0'  &&  isLanguageNameChar ((int) *p)  ;  ++p)
805 			vStringPut (mode, (int) *p);
806 	}
807 	else
808 	{
809 		/* -*- MODE -*- */
810 		const char* end = strstr (p, "-*-");
811 
812 		if (end == NULL)
813 			goto out;
814 
815 		for ( ;  p < end &&  isLanguageNameChar ((int) *p)  ;  ++p)
816 			vStringPut (mode, (int) *p);
817 
818 		for ( ;  isspace ((int) *p)  ;  ++p)
819 			;  /* no-op */
820 		if (strncmp(p, "-*-", strlen("-*-")) != 0)
821 			vStringClear (mode);
822 	}
823 
824 	vStringLower (mode);
825 
826 out:
827 	return mode;
828 
829 }
830 
extractEmacsModeAtFirstLine(MIO * input)831 static vString* extractEmacsModeAtFirstLine(MIO* input)
832 {
833 	vString* const vLine = vStringNew ();
834 	const char* const line = readLineRaw (vLine, input);
835 	vString* mode = NULL;
836 	if (line != NULL)
837 		mode = determineEmacsModeAtFirstLine (line);
838 	vStringDelete (vLine);
839 
840 	if (mode && (vStringLength(mode) == 0))
841 	{
842 		vStringDelete(mode);
843 		mode = NULL;
844 	}
845 	return mode;
846 }
847 
determineEmacsModeAtEOF(MIO * const fp)848 static vString* determineEmacsModeAtEOF (MIO* const fp)
849 {
850 	vString* const vLine = vStringNew ();
851 	const char* line;
852 	bool headerFound = false;
853 	const char* p;
854 	vString* mode = vStringNew ();
855 
856 	while ((line = readLineRaw (vLine, fp)) != NULL)
857 	{
858 		if (headerFound && ((p = strstr (line, "mode:")) != NULL))
859 		{
860 			vStringClear (mode);
861 			headerFound = false;
862 
863 			p += strlen ("mode:");
864 			for ( ;  isspace ((int) *p)  ;  ++p)
865 				;  /* no-op */
866 			for ( ;  *p != '\0'  &&  isLanguageNameChar ((int) *p)  ;  ++p)
867 				vStringPut (mode, (int) *p);
868 		}
869 		else if (headerFound && (p = strstr(line, "End:")))
870 			headerFound = false;
871 		else if (strstr (line, "Local Variables:"))
872 			headerFound = true;
873 	}
874 	vStringDelete (vLine);
875 	return mode;
876 }
877 
extractEmacsModeLanguageAtEOF(MIO * input)878 static vString* extractEmacsModeLanguageAtEOF (MIO* input)
879 {
880 	vString* mode;
881 
882 	/* "48.2.4.1 Specifying File Variables" of Emacs info:
883 	   ---------------------------------------------------
884 	   you can define file local variables using a "local
885 	   variables list" near the end of the file.  The start of the
886 	   local variables list should be no more than 3000 characters
887 	   from the end of the file, */
888 	mio_seek(input, -3000, SEEK_END);
889 
890 	mode = determineEmacsModeAtEOF (input);
891 	if (mode && (vStringLength (mode) == 0))
892 	{
893 		vStringDelete (mode);
894 		mode = NULL;
895 	}
896 
897 	return mode;
898 }
899 
determineVimFileType(const char * const modeline)900 static vString* determineVimFileType (const char *const modeline)
901 {
902 	/* considerable combinations:
903 	   --------------------------
904 	   ... filetype=
905 	   ... ft= */
906 
907 	unsigned int i;
908 	const char* p;
909 
910 	const char* const filetype_prefix[] = {"filetype=", "ft="};
911 	vString* const filetype = vStringNew ();
912 
913 	for (i = 0; i < ARRAY_SIZE(filetype_prefix); i++)
914 	{
915 		if ((p = strrstr(modeline, filetype_prefix[i])) == NULL)
916 			continue;
917 
918 		p += strlen(filetype_prefix[i]);
919 		for ( ;  *p != '\0'  &&  isalnum ((int) *p)  ;  ++p)
920 			vStringPut (filetype, (int) *p);
921 		break;
922 	}
923 	return filetype;
924 }
925 
extractVimFileType(MIO * input)926 static vString* extractVimFileType(MIO* input)
927 {
928 	/* http://vimdoc.sourceforge.net/htmldoc/options.html#modeline
929 
930 	   [text]{white}{vi:|vim:|ex:}[white]se[t] {options}:[text]
931 	   options=> filetype=TYPE or ft=TYPE
932 
933 	   'modelines' 'mls'	number	(default 5)
934 			global
935 			{not in Vi}
936 	    If 'modeline' is on 'modelines' gives the number of lines that is
937 	    checked for set commands. */
938 
939 	vString* filetype = NULL;
940 #define RING_SIZE 5
941 	vString* ring[RING_SIZE];
942 	int i, j;
943 	unsigned int k;
944 	const char* const prefix[] = {
945 		"vim:", "vi:", "ex:"
946 	};
947 
948 	for (i = 0; i < RING_SIZE; i++)
949 		ring[i] = vStringNew ();
950 
951 	i = 0;
952 	while ((readLineRaw (ring[i++], input)) != NULL)
953 		if (i == RING_SIZE)
954 			i = 0;
955 
956 	j = i;
957 	do
958 	{
959 		const char* p;
960 
961 		j--;
962 		if (j < 0)
963 			j = RING_SIZE - 1;
964 
965 		for (k = 0; k < ARRAY_SIZE(prefix); k++)
966 			if ((p = strstr (vStringValue (ring[j]), prefix[k])) != NULL)
967 			{
968 				p += strlen(prefix[k]);
969 				for ( ;  isspace ((int) *p)  ;  ++p)
970 					;  /* no-op */
971 				filetype = determineVimFileType(p);
972 				break;
973 			}
974 	} while (((i == RING_SIZE)? (j != RING_SIZE - 1): (j != i)) && (!filetype));
975 
976 	for (i = RING_SIZE - 1; i >= 0; i--)
977 		vStringDelete (ring[i]);
978 #undef RING_SIZE
979 
980 	if (filetype && (vStringLength (filetype) == 0))
981 	{
982 		vStringDelete (filetype);
983 		filetype = NULL;
984 	}
985 	return filetype;
986 
987 	/* TODO:
988 	   [text]{white}{vi:|vim:|ex:}[white]{options} */
989 }
990 
extractMarkGeneric(MIO * input,vString * (* determiner)(const char * const,void *),void * data)991 static vString* extractMarkGeneric (MIO* input,
992 									vString * (* determiner)(const char *const, void *),
993 									void *data)
994 {
995 	vString* const vLine = vStringNew ();
996 	const char* const line = readLineRaw (vLine, input);
997 	vString* mode = NULL;
998 
999 	if (line)
1000 		mode = determiner (line, data);
1001 
1002 	vStringDelete (vLine);
1003 	return mode;
1004 }
1005 
determineZshAutoloadTag(const char * const modeline,void * data CTAGS_ATTR_UNUSED)1006 static vString* determineZshAutoloadTag (const char *const modeline,
1007 										 void *data CTAGS_ATTR_UNUSED)
1008 {
1009 	/* See "Autoloaded files" in zsh info.
1010 	   -------------------------------------
1011 	   #compdef ...
1012 	   #autoload [ OPTIONS ] */
1013 
1014 	if (((strncmp (modeline, "#compdef", 8) == 0) && isspace (*(modeline + 8)))
1015 	    || ((strncmp (modeline, "#autoload", 9) == 0)
1016 		&& (isspace (*(modeline + 9)) || *(modeline + 9) == '\0')))
1017 		return vStringNewInit ("zsh");
1018 	else
1019 		return NULL;
1020 }
1021 
extractZshAutoloadTag(MIO * input)1022 static vString* extractZshAutoloadTag(MIO* input)
1023 {
1024 	return extractMarkGeneric (input, determineZshAutoloadTag, NULL);
1025 }
1026 
determinePHPMark(const char * const modeline,void * data CTAGS_ATTR_UNUSED)1027 static vString* determinePHPMark(const char *const modeline,
1028 		void *data CTAGS_ATTR_UNUSED)
1029 {
1030 	if (strncmp (modeline, "<?php", 5) == 0)
1031 		return vStringNewInit ("php");
1032 	else
1033 		return NULL;
1034 }
1035 
extractPHPMark(MIO * input)1036 static vString* extractPHPMark(MIO* input)
1037 {
1038 	return extractMarkGeneric (input, determinePHPMark, NULL);
1039 }
1040 
1041 
1042 struct getLangCtx {
1043     const char *fileName;
1044     MIO        *input;
1045     bool     err;
1046 };
1047 
1048 #define GLC_FOPEN_IF_NECESSARY0(_glc_, _label_) do {        \
1049     if (!(_glc_)->input) {                                  \
1050 	    (_glc_)->input = getMio((_glc_)->fileName, "rb", false);	\
1051         if (!(_glc_)->input) {                              \
1052             (_glc_)->err = true;                            \
1053             goto _label_;                                   \
1054         }                                                   \
1055     }                                                       \
1056 } while (0)                                                 \
1057 
1058 #define GLC_FOPEN_IF_NECESSARY(_glc_, _label_, _doesParserRequireMemoryStream_) \
1059 	do {								\
1060 		if (!(_glc_)->input)					\
1061 			GLC_FOPEN_IF_NECESSARY0 (_glc_, _label_);	\
1062 		if ((_doesParserRequireMemoryStream_) &&		\
1063 		    (mio_memory_get_data((_glc_)->input, NULL) == NULL)) \
1064 		{							\
1065 			MIO *tmp_ = (_glc_)->input;			\
1066 			(_glc_)->input = mio_new_mio (tmp_, 0, -1);	\
1067 			mio_unref (tmp_);				\
1068 			if (!(_glc_)->input) {				\
1069 				(_glc_)->err = true;			\
1070 				goto _label_;				\
1071 			}						\
1072 		}							\
1073 	} while (0)
1074 
1075 #define GLC_FCLOSE(_glc_) do {                              \
1076     if ((_glc_)->input) {                                   \
1077         mio_unref((_glc_)->input);                             \
1078         (_glc_)->input = NULL;                              \
1079     }                                                       \
1080 } while (0)
1081 
1082 static const struct taster {
1083 	vString* (* taste) (MIO *);
1084         const char     *msg;
1085 } eager_tasters[] = {
1086         {
1087 		.taste  = extractInterpreter,
1088 		.msg    = "interpreter",
1089         },
1090 	{
1091 		.taste  = extractZshAutoloadTag,
1092 		.msg    = "zsh autoload tag",
1093 	},
1094         {
1095 		.taste  = extractEmacsModeAtFirstLine,
1096 		.msg    = "emacs mode at the first line",
1097         },
1098         {
1099 		.taste  = extractEmacsModeLanguageAtEOF,
1100 		.msg    = "emacs mode at the EOF",
1101         },
1102         {
1103 		.taste  = extractVimFileType,
1104 		.msg    = "vim modeline",
1105         },
1106 		{
1107 		.taste  = extractPHPMark,
1108 		.msg    = "PHP marker",
1109 		}
1110 };
1111 static langType tasteLanguage (struct getLangCtx *glc, const struct taster *const tasters, int n_tasters,
1112 			      langType *fallback);
1113 
1114 /* If all the candidates have the same specialized language selector, return
1115  * it.  Otherwise, return NULL.
1116  */
1117 static bool
hasTheSameSelector(langType lang,selectLanguage candidate_selector)1118 hasTheSameSelector (langType lang, selectLanguage candidate_selector)
1119 {
1120 	selectLanguage *selector;
1121 
1122 	selector = LanguageTable[ lang ].def->selectLanguage;
1123 	if (selector == NULL)
1124 		return false;
1125 
1126 	while (*selector)
1127 	{
1128 		if (*selector == candidate_selector)
1129 			return true;
1130 		selector++;
1131 	}
1132 	return false;
1133 }
1134 
1135 static selectLanguage
commonSelector(const parserCandidate * candidates,int n_candidates)1136 commonSelector (const parserCandidate *candidates, int n_candidates)
1137 {
1138     Assert (n_candidates > 1);
1139     selectLanguage *selector;
1140     int i;
1141 
1142     selector = LanguageTable[ candidates[0].lang ].def->selectLanguage;
1143     if (selector == NULL)
1144 	    return NULL;
1145 
1146     while (*selector)
1147     {
1148 	    for (i = 1; i < n_candidates; ++i)
1149 		    if (! hasTheSameSelector (candidates[i].lang, *selector))
1150 			    break;
1151 	    if (i == n_candidates)
1152 		    return *selector;
1153 	    selector++;
1154     }
1155     return NULL;
1156 }
1157 
1158 
1159 /* Calls the selector and returns the integer value of the parser for the
1160  * language associated with the string returned by the selector.
1161  */
1162 static int
pickLanguageBySelection(selectLanguage selector,MIO * input,parserCandidate * candidates,unsigned int nCandidates)1163 pickLanguageBySelection (selectLanguage selector, MIO *input,
1164 						 parserCandidate *candidates,
1165 						 unsigned int nCandidates)
1166 {
1167 	const char *lang;
1168 	langType *cs = xMalloc(nCandidates, langType);
1169 	unsigned int i;
1170 
1171 	for (i = 0; i < nCandidates; i++)
1172 		cs[i] = candidates[i].lang;
1173     lang = selector(input, cs, nCandidates);
1174 	eFree (cs);
1175 
1176     if (lang)
1177     {
1178         verbose ("		selection: %s\n", lang);
1179         return getNamedLanguage(lang, 0);
1180     }
1181     else
1182     {
1183 	verbose ("		no selection\n");
1184         return LANG_IGNORE;
1185     }
1186 }
1187 
compareParsersByName(const void * a,const void * b)1188 static int compareParsersByName (const void *a, const void* b)
1189 {
1190 	const parserDefinition *const *la = a, *const *lb = b;
1191 	return strcasecmp ((*la)->name, (*lb)->name);
1192 }
1193 
sortParserCandidatesBySpecType(const void * a,const void * b)1194 static int sortParserCandidatesBySpecType (const void *a, const void *b)
1195 {
1196 	const parserCandidate *ap = a, *bp = b;
1197 	if (ap->specType > bp->specType)
1198 		return -1;
1199 	else if (ap->specType == bp->specType)
1200 	{
1201 		/* qsort, the function calling this function,
1202 		   doesn't do "stable sort". To make the result of
1203 		   sorting predictable, compare the names of parsers
1204 		   when their specType is the same. */
1205 		parserDefinition *la = LanguageTable [ap->lang].def;
1206 		parserDefinition *lb = LanguageTable [bp->lang].def;
1207 		return compareParsersByName (&la, &lb);
1208 	}
1209 	else
1210 		return 1;
1211 }
1212 
sortAndFilterParserCandidates(parserCandidate * candidates,unsigned int n_candidates)1213 static unsigned int sortAndFilterParserCandidates (parserCandidate  *candidates,
1214 						   unsigned int n_candidates)
1215 {
1216 	enum specType highestSpecType;
1217 	unsigned int i;
1218 	unsigned int r;
1219 
1220 	if (n_candidates < 2)
1221 		return n_candidates;
1222 
1223 	qsort (candidates, n_candidates, sizeof(*candidates),
1224 	       sortParserCandidatesBySpecType);
1225 
1226 	highestSpecType = candidates [0].specType;
1227 	r = 1;
1228 	for (i = 1; i < n_candidates; i++)
1229 	{
1230 		if (candidates[i].specType == highestSpecType)
1231 			r++;
1232 	}
1233 	return r;
1234 }
1235 
verboseReportCandidate(const char * header,parserCandidate * candidates,unsigned int n_candidates)1236 static void verboseReportCandidate (const char *header,
1237 				    parserCandidate  *candidates,
1238 				    unsigned int n_candidates)
1239 {
1240 	unsigned int i;
1241 	verbose ("		#%s: %u\n", header, n_candidates);
1242 	for (i = 0; i < n_candidates; i++)
1243 		verbose ("			%u: %s (%s: \"%s\")\n",
1244 			 i,
1245 			 LanguageTable[candidates[i].lang].def->name,
1246 			 specTypeName [candidates[i].specType],
1247 			 candidates[i].spec);
1248 }
1249 
doesCandidatesRequireMemoryStream(const parserCandidate * candidates,int n_candidates)1250 static bool doesCandidatesRequireMemoryStream(const parserCandidate *candidates,
1251 						 int n_candidates)
1252 {
1253 	int i;
1254 
1255 	for (i = 0; i < n_candidates; i++)
1256 		if (doesParserRequireMemoryStream (candidates[i].lang))
1257 			return true;
1258 
1259 	return false;
1260 }
1261 
getSpecLanguageCommon(const char * const spec,struct getLangCtx * glc,unsigned int nominate (const char * const,parserCandidate **),langType * fallback)1262 static langType getSpecLanguageCommon (const char *const spec, struct getLangCtx *glc,
1263 				       unsigned int nominate (const char *const, parserCandidate**),
1264 				       langType *fallback)
1265 {
1266 	langType language;
1267 	parserCandidate  *candidates;
1268 	unsigned int n_candidates;
1269 
1270 	if (fallback)
1271 		*fallback = LANG_IGNORE;
1272 
1273 	n_candidates = (*nominate)(spec, &candidates);
1274 	verboseReportCandidate ("candidates",
1275 				candidates, n_candidates);
1276 
1277 	n_candidates = sortAndFilterParserCandidates (candidates, n_candidates);
1278 	verboseReportCandidate ("candidates after sorting and filtering",
1279 				candidates, n_candidates);
1280 
1281 	if (n_candidates == 1)
1282 	{
1283 		language = candidates[0].lang;
1284 	}
1285 	else if (n_candidates > 1)
1286 	{
1287 		selectLanguage selector = commonSelector(candidates, n_candidates);
1288 		bool memStreamRequired = doesCandidatesRequireMemoryStream (candidates,
1289 									       n_candidates);
1290 
1291 		GLC_FOPEN_IF_NECESSARY(glc, fopen_error, memStreamRequired);
1292 		if (selector) {
1293 			verbose ("	selector: %p\n", selector);
1294 			language = pickLanguageBySelection(selector, glc->input, candidates, n_candidates);
1295 		} else {
1296 			verbose ("	selector: NONE\n");
1297 		fopen_error:
1298 			language = LANG_IGNORE;
1299 		}
1300 
1301 		Assert(language != LANG_AUTO);
1302 
1303 		if (fallback)
1304 			*fallback = candidates[0].lang;
1305 	}
1306 	else
1307 	{
1308 		language = LANG_IGNORE;
1309 	}
1310 
1311 	eFree(candidates);
1312 	candidates = NULL;
1313 
1314 	return language;
1315 }
1316 
getSpecLanguage(const char * const spec,struct getLangCtx * glc,langType * fallback)1317 static langType getSpecLanguage (const char *const spec,
1318                                  struct getLangCtx *glc,
1319 				 langType *fallback)
1320 {
1321 	return getSpecLanguageCommon(spec, glc, nominateLanguageCandidates,
1322 				     fallback);
1323 }
1324 
getPatternLanguage(const char * const baseName,struct getLangCtx * glc,langType * fallback)1325 static langType getPatternLanguage (const char *const baseName,
1326                                     struct getLangCtx *glc,
1327 				    langType *fallback)
1328 {
1329 	return getSpecLanguageCommon(baseName, glc,
1330 				     nominateLanguageCandidatesForPattern,
1331 				     fallback);
1332 }
1333 
1334 /* This function tries to figure out language contained in a file by
1335  * running a series of tests, trying to find some clues in the file.
1336  */
1337 static langType
tasteLanguage(struct getLangCtx * glc,const struct taster * const tasters,int n_tasters,langType * fallback)1338 tasteLanguage (struct getLangCtx *glc, const struct taster *const tasters, int n_tasters,
1339 	      langType *fallback)
1340 {
1341     int i;
1342 
1343     if (fallback)
1344 	    *fallback = LANG_IGNORE;
1345     for (i = 0; i < n_tasters; ++i) {
1346         langType language;
1347         vString* spec;
1348 
1349         mio_rewind(glc->input);
1350 	spec = tasters[i].taste(glc->input);
1351 
1352         if (NULL != spec) {
1353             verbose ("	%s: %s\n", tasters[i].msg, vStringValue (spec));
1354             language = getSpecLanguage (vStringValue (spec), glc,
1355 					(fallback && (*fallback == LANG_IGNORE))? fallback: NULL);
1356             vStringDelete (spec);
1357             if (language != LANG_IGNORE)
1358                 return language;
1359         }
1360     }
1361 
1362     return LANG_IGNORE;
1363 }
1364 
1365 
1366 struct GetLanguageRequest {
1367 	enum { GLR_OPEN, GLR_DISCARD, GLR_REUSE, } type;
1368 	const char *const fileName;
1369 	MIO *mio;
1370 	time_t mtime;
1371 };
1372 
1373 static langType
getFileLanguageForRequestInternal(struct GetLanguageRequest * req)1374 getFileLanguageForRequestInternal (struct GetLanguageRequest *req)
1375 {
1376 	const char *const fileName = req->fileName;
1377     langType language;
1378 
1379     /* ctags tries variety ways(HINTS) to choose a proper language
1380        for given fileName. If multiple candidates are chosen in one of
1381        the hint, a SELECTOR common between the candidate languages
1382        is called.
1383 
1384        "selection failure" means a selector common between the
1385        candidates doesn't exist or the common selector returns NULL.
1386 
1387        "hint failure" means the hint finds no candidate or
1388        "selection failure" occurs though the hint finds multiple
1389        candidates.
1390 
1391        If a hint chooses multiple candidates, and selection failure is
1392        occurred, the hint records one of the candidates as FALLBACK for
1393        the hint. (The candidates are stored in an array. The first
1394        element of the array is recorded. However, there is no
1395        specification about the order of elements in the array.)
1396 
1397        If all hints are failed, FALLBACKs of the hints are examined.
1398        Which fallbacks should be chosen?  `enum hint' defines the order. */
1399     enum hint {
1400 	    HINT_INTERP,
1401 	    HINT_OTHER,
1402 	    HINT_FILENAME,
1403 	    HINT_TEMPLATE,
1404 	    N_HINTS,
1405     };
1406     langType fallback[N_HINTS];
1407     int i;
1408     struct getLangCtx glc = {
1409         .fileName = fileName,
1410         .input    = (req->type == GLR_REUSE)? mio_ref (req->mio): NULL,
1411         .err      = false,
1412     };
1413     const char* const baseName = baseFilename (fileName);
1414     char *templateBaseName = NULL;
1415     fileStatus *fstatus = NULL;
1416 
1417     for (i = 0; i < N_HINTS; i++)
1418 	fallback [i] = LANG_IGNORE;
1419 
1420     verbose ("Get file language for %s\n", fileName);
1421 
1422     verbose ("	pattern: %s\n", baseName);
1423     language = getPatternLanguage (baseName, &glc,
1424 				   fallback + HINT_FILENAME);
1425     if (language != LANG_IGNORE || glc.err)
1426         goto cleanup;
1427 
1428     {
1429         const char* const tExt = ".in";
1430         templateBaseName = baseFilenameSansExtensionNew (fileName, tExt);
1431         if (templateBaseName)
1432         {
1433             verbose ("	pattern + template(%s): %s\n", tExt, templateBaseName);
1434             GLC_FOPEN_IF_NECESSARY(&glc, cleanup, false);
1435             mio_rewind(glc.input);
1436             language = getPatternLanguage(templateBaseName, &glc,
1437 					  fallback + HINT_TEMPLATE);
1438             if (language != LANG_IGNORE)
1439                 goto cleanup;
1440         }
1441     }
1442 
1443 	/* If the input is already opened, we don't have to verify the existence. */
1444     if (glc.input || ((fstatus = eStat (fileName)) && fstatus->exists))
1445     {
1446 	    if ((fstatus && fstatus->isExecutable) || Option.guessLanguageEagerly)
1447 	    {
1448 		    GLC_FOPEN_IF_NECESSARY (&glc, cleanup, false);
1449 		    language = tasteLanguage(&glc, eager_tasters, 1,
1450 					    fallback + HINT_INTERP);
1451 	    }
1452 	    if (language != LANG_IGNORE)
1453 		    goto cleanup;
1454 
1455 	    if (Option.guessLanguageEagerly)
1456 	    {
1457 		    GLC_FOPEN_IF_NECESSARY(&glc, cleanup, false);
1458 		    language = tasteLanguage(&glc,
1459 					     eager_tasters + 1,
1460 					     ARRAY_SIZE(eager_tasters) - 1,
1461 					     fallback + HINT_OTHER);
1462 	    }
1463     }
1464 
1465 
1466   cleanup:
1467 	if (req->type == GLR_OPEN && glc.input)
1468 	{
1469 		req->mio = mio_ref (glc.input);
1470 		if (!fstatus)
1471 			fstatus = eStat (fileName);
1472 		if (fstatus)
1473 			req->mtime = fstatus->mtime;
1474 	}
1475     GLC_FCLOSE(&glc);
1476     if (fstatus)
1477 	    eStatFree (fstatus);
1478     if (templateBaseName)
1479         eFree (templateBaseName);
1480 
1481     for (i = 0;
1482 	 language == LANG_IGNORE && i < N_HINTS;
1483 	 i++)
1484     {
1485         language = fallback [i];
1486 	if (language != LANG_IGNORE)
1487         verbose ("	fallback[hint = %d]: %s\n", i, getLanguageName (language));
1488     }
1489 
1490 	if (language == LANG_IGNORE
1491 		&& isLanguageEnabled (LANG_FALLBACK))
1492 	{
1493 		language = LANG_FALLBACK;
1494 		verbose ("	last resort: using \"%s\" parser\n",
1495 				 getLanguageName (LANG_FALLBACK));
1496 	}
1497     return language;
1498 }
1499 
getFileLanguageForRequest(struct GetLanguageRequest * req)1500 static langType getFileLanguageForRequest (struct GetLanguageRequest *req)
1501 {
1502 	langType l = Option.language;
1503 
1504 	if (l == LANG_AUTO)
1505 		return getFileLanguageForRequestInternal(req);
1506 	else if (! isLanguageEnabled (l))
1507 	{
1508 		error (FATAL,
1509 		       "%s parser specified with --language-force is disabled",
1510 		       getLanguageName (l));
1511 		/* For suppressing warnings. */
1512 		return LANG_AUTO;
1513 	}
1514 	else
1515 		return Option.language;
1516 }
1517 
getLanguageForFilenameAndContents(const char * const fileName)1518 extern langType getLanguageForFilenameAndContents (const char *const fileName)
1519 {
1520 	struct GetLanguageRequest req = {
1521 		.type = GLR_DISCARD,
1522 		.fileName = fileName,
1523 		.mtime = (time_t)0,
1524 	};
1525 
1526 	return getFileLanguageForRequest (&req);
1527 }
1528 
1529 typedef void (*languageCallback)  (langType language, void* user_data);
foreachLanguage(languageCallback callback,void * user_data)1530 static void foreachLanguage(languageCallback callback, void *user_data)
1531 {
1532 	langType result = LANG_IGNORE;
1533 
1534 	unsigned int i;
1535 	for (i = 0  ;  i < LanguageCount  &&  result == LANG_IGNORE  ;  ++i)
1536 	{
1537 		const parserDefinition* const lang = LanguageTable [i].def;
1538 		if (lang->name != NULL)
1539 			callback(i, user_data);
1540 	}
1541 }
1542 
printLanguageMap(const langType language,FILE * fp)1543 static void printLanguageMap (const langType language, FILE *fp)
1544 {
1545 	bool first = true;
1546 	unsigned int i;
1547 	parserObject *parser = LanguageTable + language;
1548 	stringList* map = parser->currentPatterns;
1549 	Assert (0 <= language  &&  language < (int) LanguageCount);
1550 	for (i = 0  ;  map != NULL  &&  i < stringListCount (map)  ;  ++i)
1551 	{
1552 		fprintf (fp, "%s(%s)", (first ? "" : " "),
1553 			 vStringValue (stringListItem (map, i)));
1554 		first = false;
1555 	}
1556 	map = parser->currentExtensions;
1557 	for (i = 0  ;  map != NULL  &&  i < stringListCount (map)  ;  ++i)
1558 	{
1559 		fprintf (fp, "%s.%s", (first ? "" : " "),
1560 			 vStringValue (stringListItem (map, i)));
1561 		first = false;
1562 	}
1563 }
1564 
installLanguageMapDefault(const langType language)1565 extern void installLanguageMapDefault (const langType language)
1566 {
1567 	parserObject* parser;
1568 	Assert (0 <= language  &&  language < (int) LanguageCount);
1569 	parser = LanguageTable + language;
1570 	if (parser->currentPatterns != NULL)
1571 		stringListDelete (parser->currentPatterns);
1572 	if (parser->currentExtensions != NULL)
1573 		stringListDelete (parser->currentExtensions);
1574 
1575 	if (parser->def->patterns == NULL)
1576 		parser->currentPatterns = stringListNew ();
1577 	else
1578 	{
1579 		parser->currentPatterns =
1580 			stringListNewFromArgv (parser->def->patterns);
1581 	}
1582 	if (parser->def->extensions == NULL)
1583 		parser->currentExtensions = stringListNew ();
1584 	else
1585 	{
1586 		parser->currentExtensions =
1587 			stringListNewFromArgv (parser->def->extensions);
1588 	}
1589 	BEGIN_VERBOSE(vfp);
1590 	{
1591 	printLanguageMap (language, vfp);
1592 	putc ('\n', vfp);
1593 	}
1594 	END_VERBOSE();
1595 }
1596 
installLanguageMapDefaults(void)1597 extern void installLanguageMapDefaults (void)
1598 {
1599 	unsigned int i;
1600 	for (i = 0  ;  i < LanguageCount  ;  ++i)
1601 	{
1602 		verbose ("    %s: ", getLanguageName (i));
1603 		installLanguageMapDefault (i);
1604 	}
1605 }
1606 
installLanguageAliasesDefault(const langType language)1607 extern void installLanguageAliasesDefault (const langType language)
1608 {
1609 	parserObject* parser;
1610 	Assert (0 <= language  &&  language < (int) LanguageCount);
1611 	parser = LanguageTable + language;
1612 	if (parser->currentAliases != NULL)
1613 		stringListDelete (parser->currentAliases);
1614 
1615 	if (parser->def->aliases == NULL)
1616 		parser->currentAliases = stringListNew ();
1617 	else
1618 	{
1619 		parser->currentAliases =
1620 			stringListNewFromArgv (parser->def->aliases);
1621 	}
1622 	BEGIN_VERBOSE(vfp);
1623 	if (parser->currentAliases != NULL)
1624 		for (unsigned int i = 0  ;  i < stringListCount (parser->currentAliases)  ;  ++i)
1625 			fprintf (vfp, " %s", vStringValue (
1626 						 stringListItem (parser->currentAliases, i)));
1627 	putc ('\n', vfp);
1628 	END_VERBOSE();
1629 }
1630 
installLanguageAliasesDefaults(void)1631 extern void installLanguageAliasesDefaults (void)
1632 {
1633 	unsigned int i;
1634 	for (i = 0  ;  i < LanguageCount  ;  ++i)
1635 	{
1636 		verbose ("    %s: ", getLanguageName (i));
1637 		installLanguageAliasesDefault (i);
1638 	}
1639 }
1640 
clearLanguageMap(const langType language)1641 extern void clearLanguageMap (const langType language)
1642 {
1643 	Assert (0 <= language  &&  language < (int) LanguageCount);
1644 	stringListClear ((LanguageTable + language)->currentPatterns);
1645 	stringListClear ((LanguageTable + language)->currentExtensions);
1646 }
1647 
clearLanguageAliases(const langType language)1648 extern void clearLanguageAliases (const langType language)
1649 {
1650 	Assert (0 <= language  &&  language < (int) LanguageCount);
1651 
1652 	parserObject* parser = (LanguageTable + language);
1653 	if (parser->currentAliases)
1654 		stringListClear (parser->currentAliases);
1655 }
1656 
removeLanguagePatternMap1(const langType language,const char * const pattern)1657 static bool removeLanguagePatternMap1(const langType language, const char *const pattern)
1658 {
1659 	bool result = false;
1660 	stringList* const ptrn = (LanguageTable + language)->currentPatterns;
1661 
1662 	if (ptrn != NULL && stringListDeleteItemExtension (ptrn, pattern))
1663 	{
1664 		verbose (" (removed from %s)", getLanguageName (language));
1665 		result = true;
1666 	}
1667 	return result;
1668 }
1669 
removeLanguagePatternMap(const langType language,const char * const pattern)1670 extern bool removeLanguagePatternMap (const langType language, const char *const pattern)
1671 {
1672 	bool result = false;
1673 
1674 	if (language == LANG_AUTO)
1675 	{
1676 		unsigned int i;
1677 		for (i = 0  ;  i < LanguageCount  &&  ! result ;  ++i)
1678 			result = removeLanguagePatternMap1 (i, pattern) || result;
1679 	}
1680 	else
1681 		result = removeLanguagePatternMap1 (language, pattern);
1682 	return result;
1683 }
1684 
addLanguagePatternMap(const langType language,const char * ptrn,bool exclusiveInAllLanguages)1685 extern void addLanguagePatternMap (const langType language, const char* ptrn,
1686 				   bool exclusiveInAllLanguages)
1687 {
1688 	vString* const str = vStringNewInit (ptrn);
1689 	parserObject* parser;
1690 	Assert (0 <= language  &&  language < (int) LanguageCount);
1691 	parser = LanguageTable + language;
1692 	if (exclusiveInAllLanguages)
1693 		removeLanguagePatternMap (LANG_AUTO, ptrn);
1694 	stringListAdd (parser->currentPatterns, str);
1695 }
1696 
removeLanguageExtensionMap1(const langType language,const char * const extension)1697 static bool removeLanguageExtensionMap1 (const langType language, const char *const extension)
1698 {
1699 	bool result = false;
1700 	stringList* const exts = (LanguageTable + language)->currentExtensions;
1701 
1702 	if (exts != NULL  &&  stringListDeleteItemExtension (exts, extension))
1703 	{
1704 		verbose (" (removed from %s)", getLanguageName (language));
1705 		result = true;
1706 	}
1707 	return result;
1708 }
1709 
removeLanguageExtensionMap(const langType language,const char * const extension)1710 extern bool removeLanguageExtensionMap (const langType language, const char *const extension)
1711 {
1712 	bool result = false;
1713 
1714 	if (language == LANG_AUTO)
1715 	{
1716 		unsigned int i;
1717 		for (i = 0  ;  i < LanguageCount ;  ++i)
1718 			result = removeLanguageExtensionMap1 (i, extension) || result;
1719 	}
1720 	else
1721 		result = removeLanguageExtensionMap1 (language, extension);
1722 	return result;
1723 }
1724 
addLanguageExtensionMap(const langType language,const char * extension,bool exclusiveInAllLanguages)1725 extern void addLanguageExtensionMap (
1726 		const langType language, const char* extension,
1727 		bool exclusiveInAllLanguages)
1728 {
1729 	vString* const str = vStringNewInit (extension);
1730 	Assert (0 <= language  &&  language < (int) LanguageCount);
1731 	if (exclusiveInAllLanguages)
1732 		removeLanguageExtensionMap (LANG_AUTO, extension);
1733 	stringListAdd ((LanguageTable + language)->currentExtensions, str);
1734 }
1735 
addLanguageAlias(const langType language,const char * alias)1736 extern void addLanguageAlias (const langType language, const char* alias)
1737 {
1738 	vString* const str = vStringNewInit (alias);
1739 	parserObject* parser;
1740 	Assert (0 <= language  &&  language < (int) LanguageCount);
1741 	parser = LanguageTable + language;
1742 	if (parser->currentAliases == NULL)
1743 		parser->currentAliases = stringListNew ();
1744 	stringListAdd (parser->currentAliases, str);
1745 }
1746 
enableLanguage(const langType language,const bool state)1747 extern void enableLanguage (const langType language, const bool state)
1748 {
1749 	Assert (0 <= language  &&  language < (int) LanguageCount);
1750 	LanguageTable [language].def->enabled = state;
1751 }
1752 
1753 #ifdef DO_TRACING
traceLanguage(langType language)1754 extern void traceLanguage (langType language)
1755 {
1756 	Assert (0 <= language  &&  language < (int) LanguageCount);
1757 	LanguageTable [language].def->traced = true;
1758 }
isLanguageTraced(langType language)1759 extern bool isLanguageTraced (langType language)
1760 {
1761 	Assert (0 <= language  &&  language < (int) LanguageCount);
1762 	return LanguageTable [language].def->traced;
1763 }
1764 #endif /* DO_TRACING */
1765 
enableLanguages(const bool state)1766 extern void enableLanguages (const bool state)
1767 {
1768 	unsigned int i;
1769 	for (i = 0  ;  i < LanguageCount  ;  ++i)
1770 		enableLanguage (i, state);
1771 }
1772 
installFieldDefinition(const langType language)1773 static void installFieldDefinition (const langType language)
1774 {
1775 	unsigned int i;
1776 	parserDefinition * parser;
1777 
1778 	Assert (0 <= language  &&  language < (int) LanguageCount);
1779 	parser = LanguageTable [language].def;
1780 
1781 	if (parser->fieldTable != NULL)
1782 	{
1783 		for (i = 0; i < parser->fieldCount; i++)
1784 			defineField (& parser->fieldTable [i], language);
1785 	}
1786 }
1787 
installXtagDefinition(const langType language)1788 static void installXtagDefinition (const langType language)
1789 {
1790 	unsigned int i;
1791 	parserDefinition * parser;
1792 
1793 	Assert (0 <= language  &&  language < (int) LanguageCount);
1794 	parser = LanguageTable [language].def;
1795 
1796 	if (parser->xtagTable != NULL)
1797 	{
1798 		for (i = 0; i < parser->xtagCount; i++)
1799 			defineXtag (& parser->xtagTable [i], language);
1800 	}
1801 }
1802 
initializeParserOne(langType lang)1803 static void initializeParserOne (langType lang)
1804 {
1805 	parserObject *const parser = LanguageTable + lang;
1806 
1807 	if (parser->initialized)
1808 		goto out;
1809 
1810 	verbose ("Initialize parser: %s\n", parser->def->name);
1811 	parser->initialized = true;
1812 
1813 	installKeywordTable (lang);
1814 	installTagXpathTable (lang);
1815 	installFieldDefinition     (lang);
1816 	installXtagDefinition      (lang);
1817 
1818 	/* regex definitions refers xtag definitions.
1819 	   So installing RegexTable must be after installing
1820 	   xtag definitions. */
1821 	installTagRegexTable (lang);
1822 
1823 	if (parser->def->initialize != NULL)
1824 		parser->def->initialize (lang);
1825 
1826 	initializeDependencies (parser->def, parser->slaveControlBlock);
1827 
1828 	Assert (parser->fileKind != NULL);
1829 	Assert (!doesParserUseKind (parser->kindControlBlock, parser->fileKind->letter));
1830 
1831 	return;
1832 
1833  out:
1834 	/* lazyInitialize() installs findRegexTags() to parser->parser.
1835 	   findRegexTags() should be installed to a parser if the parser is
1836 	   optlib based(created by --langdef) and has some regex patterns(defined
1837 	   with --regex-<LANG>). findRegexTags() makes regex matching work.
1838 
1839 	   If a parser can be initialized during evaluating options,
1840 	   --fields-<LANG>=+{something}, for an example.
1841 	   If such option is evaluated first, evaluating --regex-<LANG>=...
1842 	   option doesn't cause installing findRegexTags. As the result
1843 	   regex matching doesn't work. lazyInitialize was called only
1844 	   once when --fields-<LANG>=+{something} was evaluated. In the
1845 	   timing ctags had not seen --regex-<LANG>=.... Even though
1846 	   ctags saw --regex-<LANG>=.... after initializing, there
1847 	   was no chance to install findRegexTags() to parser->parser.
1848 
1849 	   Following code block gives extra chances to call lazyInitialize)
1850 	   which installs findRegexTags() to parser->parser.	 */
1851 	if (parser->def->initialize == lazyInitialize)
1852 		parser->def->initialize (lang);
1853 }
1854 
initializeParser(langType lang)1855 extern void initializeParser (langType lang)
1856 {
1857 	if (lang == LANG_AUTO)
1858 	{
1859 		unsigned int i;
1860 		for (i = 0; i < countParsers(); i++)
1861 			initializeParserOne (i);
1862 	}
1863 	else
1864 		initializeParserOne (lang);
1865 }
1866 
linkDependenciesAtInitializeParsing(parserDefinition * const parser)1867 static void linkDependenciesAtInitializeParsing (parserDefinition *const parser)
1868 {
1869 	unsigned int i;
1870 	parserDependency *d;
1871 	langType upper;
1872 	parserDefinition *lowerParser;
1873 	parserObject *upperParser;
1874 
1875 	for (i = 0; i < parser->dependencyCount; i++)
1876 	{
1877 		d = parser->dependencies + i;
1878 
1879 		if (d->type == DEPTYPE_FOREIGNER)
1880 		{
1881 			upper = parser->id;
1882 			langType lower = getNamedLanguage (d->upperParser, 0);
1883 			if (lower == LANG_IGNORE)
1884 				error (FATAL,
1885 					   "Unknown language: \"%s\" as a foreigner for %s",
1886 					   d->upperParser, parser->name);
1887 
1888 			lowerParser = LanguageTable [lower].def;
1889 		}
1890 		else
1891 		{
1892 			upper = getNamedLanguage (d->upperParser, 0);
1893 			lowerParser = parser;
1894 		}
1895 
1896 		upperParser = LanguageTable + upper;
1897 
1898 		linkDependencyAtInitializeParsing (d->type, upperParser->def,
1899 										   upperParser->slaveControlBlock,
1900 										   upperParser->kindControlBlock,
1901 										   lowerParser,
1902 										   (LanguageTable + lowerParser->id)->kindControlBlock,
1903 										   d->data);
1904 	}
1905 }
1906 
1907 /* Used in both builtin and optlib parsers. */
initializeParsingCommon(parserDefinition * def,bool is_builtin)1908 static void initializeParsingCommon (parserDefinition *def, bool is_builtin)
1909 {
1910 	parserObject *parser;
1911 
1912 	if (is_builtin)
1913 		verbose ("%s%s", LanguageCount > 0 ? ", " : "", def->name);
1914 	else
1915 		verbose ("Add optlib parser: %s\n", def->name);
1916 
1917 	def->id = LanguageCount++;
1918 	parser = LanguageTable + def->id;
1919 	parser->def = def;
1920 
1921 	hashTablePutItem (LanguageHTable, def->name, def);
1922 
1923 	parser->fileKind = &defaultFileKind;
1924 
1925 	parser->kindControlBlock  = allocKindControlBlock (def);
1926 	parser->slaveControlBlock = allocSlaveControlBlock (def);
1927 	parser->lregexControlBlock = allocLregexControlBlock (def);
1928 }
1929 
initializeParsing(void)1930 extern void initializeParsing (void)
1931 {
1932 	unsigned int builtInCount;
1933 	unsigned int i;
1934 
1935 	builtInCount = ARRAY_SIZE (BuiltInParsers);
1936 	LanguageTable = xMalloc (builtInCount, parserObject);
1937 	memset(LanguageTable, 0, builtInCount * sizeof (parserObject));
1938 	for (i = 0; i < builtInCount; ++i)
1939 	{
1940 		LanguageTable [i].pretendingAsLanguage = LANG_IGNORE;
1941 		LanguageTable [i].pretendedAsLanguage = LANG_IGNORE;
1942 	}
1943 
1944 	LanguageHTable = hashTableNew (127,
1945 								   hashCstrcasehash,
1946 								   hashCstrcaseeq,
1947 								   NULL,
1948 								   NULL);
1949 	DEFAULT_TRASH_BOX(LanguageHTable, hashTableDelete);
1950 
1951 	verbose ("Installing parsers: ");
1952 	for (i = 0  ;  i < builtInCount  ;  ++i)
1953 	{
1954 		parserDefinition* const def = (*BuiltInParsers [i]) ();
1955 		if (def != NULL)
1956 		{
1957 			Assert (def->name);
1958 			Assert (def->name[0] != '\0');
1959 			Assert (strcmp (def->name, RSV_LANG_ALL));
1960 			Assert (strpbrk (def->name, "!\"$%&'()*,-./:;<=>?@[\\]^`|~") == NULL);
1961 
1962 			if (def->method & METHOD_NOT_CRAFTED)
1963 				def->parser = findRegexTags;
1964 			else
1965 				/* parser definition must define one and only one parsing routine */
1966 				Assert ((!!def->parser) + (!!def->parser2) == 1);
1967 
1968 			initializeParsingCommon (def, true);
1969 		}
1970 	}
1971 	verbose ("\n");
1972 
1973 	for (i = 0; i < builtInCount  ;  ++i)
1974 		linkDependenciesAtInitializeParsing (LanguageTable [i].def);
1975 }
1976 
freeParserResources(void)1977 extern void freeParserResources (void)
1978 {
1979 	unsigned int i;
1980 	for (i = 0  ;  i < LanguageCount  ;  ++i)
1981 	{
1982 		parserObject* const parser = LanguageTable + i;
1983 
1984 		if (parser->def->finalize)
1985 			(parser->def->finalize)((langType)i, (bool)parser->initialized);
1986 
1987 		uninstallTagXpathTable (i);
1988 
1989 		freeLregexControlBlock (parser->lregexControlBlock);
1990 		freeKindControlBlock (parser->kindControlBlock);
1991 		parser->kindControlBlock = NULL;
1992 
1993 		finalizeDependencies (parser->def, parser->slaveControlBlock);
1994 		freeSlaveControlBlock (parser->slaveControlBlock);
1995 		parser->slaveControlBlock = NULL;
1996 
1997 		freeList (&parser->currentPatterns);
1998 		freeList (&parser->currentExtensions);
1999 		freeList (&parser->currentAliases);
2000 
2001 		eFree (parser->def->name);
2002 		parser->def->name = NULL;
2003 		eFree (parser->def);
2004 		parser->def = NULL;
2005 	}
2006 	if (LanguageTable != NULL)
2007 		eFree (LanguageTable);
2008 	LanguageTable = NULL;
2009 	LanguageCount = 0;
2010 }
2011 
doNothing(void)2012 static void doNothing (void)
2013 {
2014 }
2015 
optlibRunBaseParser(void)2016 static void optlibRunBaseParser (void)
2017 {
2018 	scheduleRunningBaseparser (0);
2019 }
2020 
optlibIsDedicatedSubparser(parserDefinition * def)2021 static bool optlibIsDedicatedSubparser (parserDefinition* def)
2022 {
2023 	return (def->dependencies
2024 			&& (def->dependencies->type == DEPTYPE_SUBPARSER)
2025 			&& ((subparser *)def->dependencies->data)->direction & SUBPARSER_SUB_RUNS_BASE);
2026 }
2027 
lazyInitialize(langType language)2028 static void lazyInitialize (langType language)
2029 {
2030 	parserDefinition* def;
2031 
2032 	Assert (0 <= language  &&  language < (int) LanguageCount);
2033 	def = LanguageTable [language].def;
2034 
2035 	def->parser = doNothing;
2036 
2037 	if (def->method & METHOD_REGEX)
2038 	{
2039 		if (optlibIsDedicatedSubparser (def))
2040 			def->parser = optlibRunBaseParser;
2041 		else
2042 			def->parser = findRegexTags;
2043 	}
2044 }
2045 
enableDefaultFileKind(bool state)2046 extern void enableDefaultFileKind (bool state)
2047 {
2048 	defaultFileKind.enabled = state;
2049 }
2050 
2051 /*
2052 *   Option parsing
2053 */
2054 struct preLangDefFlagData
2055 {
2056 	char *base;
2057 	subparserRunDirection direction;
2058 	bool autoFQTag;
2059 };
2060 
pre_lang_def_flag_base_long(const char * const optflag,const char * const param,void * data)2061 static void pre_lang_def_flag_base_long (const char* const optflag, const char* const param, void* data)
2062 {
2063 	struct preLangDefFlagData * flag_data = data;
2064 	langType base;
2065 
2066 	if (param[0] == '\0')
2067 	{
2068 		error (WARNING, "No base parser specified for \"%s\" flag of --langdef option", optflag);
2069 		return;
2070 	}
2071 
2072 	base = getNamedLanguage (param, 0);
2073 	if (base == LANG_IGNORE)
2074 	{
2075 		error (WARNING, "Unknown language(%s) is specified for \"%s\" flag of --langdef option",
2076 			   param, optflag);
2077 		return;
2078 
2079 	}
2080 
2081 	langType cpreproc = getNamedLanguage ("CPreProcessor", 0);
2082 	if (base == cpreproc)
2083 	{
2084 		error (WARNING,
2085 			   "Because of an internal limitation, Making a sub parser based on the CPreProcessor parser is not allowed: %s",
2086 			   param);
2087 		return;
2088 	}
2089 
2090 	flag_data->base = eStrdup(param);
2091 }
2092 
2093 #define LANGDEF_FLAG_DEDICATED "dedicated"
2094 #define LANGDEF_FLAG_SHARED    "shared"
2095 #define LANGDEF_FLAG_BIDIR     "bidirectional"
pre_lang_def_flag_direction_long(const char * const optflag,const char * const param CTAGS_ATTR_UNUSED,void * data)2096 static void pre_lang_def_flag_direction_long (const char* const optflag, const char* const param CTAGS_ATTR_UNUSED, void* data)
2097 {
2098 	struct preLangDefFlagData * flag_data = data;
2099 
2100 	if (strcmp(optflag, LANGDEF_FLAG_DEDICATED) == 0)
2101 		flag_data->direction = SUBPARSER_SUB_RUNS_BASE;
2102 	else if (strcmp(optflag, LANGDEF_FLAG_SHARED) == 0)
2103 		flag_data->direction = SUBPARSER_BASE_RUNS_SUB;
2104 	else if (strcmp(optflag, LANGDEF_FLAG_BIDIR) == 0)
2105 		flag_data->direction = SUBPARSER_BI_DIRECTION;
2106 	else
2107 		AssertNotReached ();
2108 }
2109 
pre_lang_def_flag_autoFQTag_long(const char * const optflag,const char * const param CTAGS_ATTR_UNUSED,void * data)2110 static void pre_lang_def_flag_autoFQTag_long (const char* const optflag,
2111 											  const char* const param CTAGS_ATTR_UNUSED,
2112 											  void* data)
2113 {
2114 	struct preLangDefFlagData * flag_data = data;
2115 	flag_data->autoFQTag = true;
2116 }
2117 
2118 static flagDefinition PreLangDefFlagDef [] = {
2119 	{ '\0',  "base", NULL, pre_lang_def_flag_base_long,
2120 	  "BASEPARSER", "utilize as a base parser"},
2121 	{ '\0',  LANGDEF_FLAG_DEDICATED,  NULL,
2122 	  pre_lang_def_flag_direction_long,
2123 	  NULL, "make the base parser dedicated to this subparser"},
2124 	{ '\0',  LANGDEF_FLAG_SHARED,     NULL,
2125 	  pre_lang_def_flag_direction_long,
2126 	  NULL, "share the base parser with the other subparsers"
2127 	},
2128 	{ '\0',  LANGDEF_FLAG_BIDIR,      NULL,
2129 	  pre_lang_def_flag_direction_long,
2130 	  NULL, "utilize the base parser both 'dedicated' and 'shared' way"
2131 	},
2132 	{ '\0',  "_autoFQTag", NULL, pre_lang_def_flag_autoFQTag_long,
2133 	  NULL, "make full qualified tags automatically based on scope information"},
2134 };
2135 
optlibFreeDep(langType lang,bool initialized CTAGS_ATTR_UNUSED)2136 static void optlibFreeDep (langType lang, bool initialized CTAGS_ATTR_UNUSED)
2137 {
2138 	parserDefinition * pdef = LanguageTable [lang].def;
2139 
2140 	if (pdef->dependencyCount == 1)
2141 	{
2142 		parserDependency *dep = pdef->dependencies;
2143 
2144 		eFree ((char *)dep->upperParser); /* Dirty cast */
2145 		dep->upperParser = NULL;
2146 		eFree (dep->data);
2147 		dep->data = NULL;
2148 		eFree (dep);
2149 		pdef->dependencies = NULL;
2150 	}
2151 }
2152 
OptlibParser(const char * name,const char * base,subparserRunDirection direction)2153 static parserDefinition* OptlibParser(const char *name, const char *base,
2154 									  subparserRunDirection direction)
2155 {
2156 	parserDefinition *def;
2157 
2158 	def = parserNew (name);
2159 	def->initialize        = lazyInitialize;
2160 	def->method            = METHOD_NOT_CRAFTED;
2161 	if (base)
2162 	{
2163 		subparser *sub = xCalloc (1, subparser);
2164 		parserDependency *dep = xCalloc (1, parserDependency);
2165 
2166 		sub->direction = direction;
2167 		dep->type = DEPTYPE_SUBPARSER;
2168 		dep->upperParser = eStrdup (base);
2169 		dep->data = sub;
2170 		def->dependencies = dep;
2171 		def->dependencyCount = 1;
2172 		def->finalize = optlibFreeDep;
2173 	}
2174 
2175 	return def;
2176 }
2177 
processLanguageDefineOption(const char * const option,const char * const parameter)2178 extern void processLanguageDefineOption (
2179 		const char *const option, const char *const parameter)
2180 {
2181 	char *name;
2182 	char *flags;
2183 	parserDefinition*  def;
2184 
2185 	flags = strchr (parameter, LONG_FLAGS_OPEN);
2186 	if (flags)
2187 		name = eStrndup (parameter, flags - parameter);
2188 	else
2189 		name = eStrdup (parameter);
2190 
2191 	/* Veirfy that the name of new language is acceptable or not. */
2192 	char *unacceptable;
2193 	if (name [0] == '\0')
2194 	{
2195 		eFree (name);
2196 		error (FATAL, "No language specified for \"%s\" option", option);
2197 	}
2198 	else if (getNamedLanguage (name, 0) != LANG_IGNORE)
2199 	{
2200 		/* name cannot be freed because it is used in the FATAL message. */
2201 		error (FATAL, "Language \"%s\" already defined", name);
2202 	}
2203 	else if (strcmp(name, RSV_LANG_ALL) == 0)
2204 	{
2205 		eFree (name);
2206 		error (FATAL, "\"all\" is reserved; don't use it as the name for defining a new language");
2207 	}
2208 	else if ((unacceptable = strpbrk (name, "!\"$%&'()*,-./:;<=>?@[\\]^`|~")))
2209 	{
2210 		char c = *unacceptable;
2211 
2212 		/* name cannot be freed because it is used in the FATAL message. */
2213 		/* We accept '_'.
2214 		 * We accept # and + because they are already used in C# parser and C++ parser.
2215 		 * {... is already trimmed at the beginning of this function. */
2216 		if ((c == '`') || (c == '\''))
2217 			error (FATAL, "don't use \"%c\" in a language name (%s)", c, name);
2218 		else
2219 			error (FATAL, "don't use `%c' in a language name (%s)", c, name);
2220 	}
2221 
2222 	LanguageTable = xRealloc (LanguageTable, LanguageCount + 1, parserObject);
2223 	memset (LanguageTable + LanguageCount, 0, sizeof(parserObject));
2224 
2225 	struct preLangDefFlagData data = {
2226 		.base = NULL,
2227 		.direction = SUBPARSER_UNKNOWN_DIRECTION,
2228 		.autoFQTag = false,
2229 	};
2230 	flagsEval (flags, PreLangDefFlagDef, ARRAY_SIZE (PreLangDefFlagDef), &data);
2231 
2232 	if (data.base == NULL && data.direction != SUBPARSER_UNKNOWN_DIRECTION)
2233 		error (WARNING, "Ignore the direction of subparser because \"{base=}\" is not given");
2234 
2235 	if (data.base && data.direction == SUBPARSER_UNKNOWN_DIRECTION)
2236 		data.direction = SUBPARSER_BASE_RUNS_SUB;
2237 
2238 	def = OptlibParser (name, data.base, data.direction);
2239 	if (data.base)
2240 		eFree (data.base);
2241 
2242 	def->requestAutomaticFQTag = data.autoFQTag;
2243 
2244 	initializeParsingCommon (def, false);
2245 	linkDependenciesAtInitializeParsing (def);
2246 
2247 	LanguageTable [def->id].currentPatterns = stringListNew ();
2248 	LanguageTable [def->id].currentExtensions = stringListNew ();
2249 	LanguageTable [def->id].pretendingAsLanguage = LANG_IGNORE;
2250 	LanguageTable [def->id].pretendedAsLanguage = LANG_IGNORE;
2251 
2252 	eFree (name);
2253 }
2254 
isLanguageKindEnabled(const langType language,int kindIndex)2255 extern bool isLanguageKindEnabled (const langType language, int kindIndex)
2256 {
2257 	kindDefinition * kdef = getLanguageKind (language, kindIndex);
2258 	return kdef->enabled;
2259 }
2260 
isLanguageRoleEnabled(const langType language,int kindIndex,int roleIndex)2261 extern bool isLanguageRoleEnabled (const langType language, int kindIndex, int roleIndex)
2262 {
2263 	return isRoleEnabled(LanguageTable [language].kindControlBlock,
2264 						 kindIndex, roleIndex);
2265 }
2266 
isLanguageKindRefOnly(const langType language,int kindIndex)2267 extern bool isLanguageKindRefOnly (const langType language, int kindIndex)
2268 {
2269 	kindDefinition * def =  getLanguageKind(language, kindIndex);
2270 	return def->referenceOnly;
2271 }
2272 
resetLanguageKinds(const langType language,const bool mode)2273 static void resetLanguageKinds (const langType language, const bool mode)
2274 {
2275 	const parserObject* parser;
2276 
2277 	Assert (0 <= language  &&  language < (int) LanguageCount);
2278 	parser = LanguageTable + language;
2279 
2280 	{
2281 		unsigned int i;
2282 		struct kindControlBlock *kcb = parser->kindControlBlock;
2283 
2284 		for (i = 0  ;  i < countKinds (kcb)  ;  ++i)
2285 		{
2286 			kindDefinition *kdef = getKind (kcb, i);
2287 			enableKind (kdef, mode);
2288 		}
2289 	}
2290 }
2291 
enableLanguageKindForLetter(const langType language,const int kind,const bool mode)2292 static bool enableLanguageKindForLetter (
2293 		const langType language, const int kind, const bool mode)
2294 {
2295 	bool result = false;
2296 	kindDefinition* const def = getLanguageKindForLetter (language, kind);
2297 	if (def != NULL)
2298 	{
2299 		enableKind (def, mode);
2300 		result = true;
2301 	}
2302 	return result;
2303 }
2304 
enableLanguageKindForName(const langType language,const char * const name,const bool mode)2305 static bool enableLanguageKindForName (
2306 	const langType language, const char * const name, const bool mode)
2307 {
2308 	bool result = false;
2309 	kindDefinition* const def = getLanguageKindForName (language, name);
2310 	if (def != NULL)
2311 	{
2312 		enableKind (def, mode);
2313 		result = true;
2314 	}
2315 	return result;
2316 }
2317 
processLangKindDefinition(const langType language,const char * const option,const char * const parameter)2318 static void processLangKindDefinition (
2319 		const langType language, const char *const option,
2320 		const char *const parameter)
2321 {
2322 	const char *p = parameter;
2323 	bool mode = true;
2324 	int c;
2325 	static vString *longName;
2326 	bool inLongName = false;
2327 	const char *k;
2328 	bool r;
2329 
2330 	Assert (0 <= language  &&  language < (int) LanguageCount);
2331 
2332 	initializeParser (language);
2333 	if (*p == '*')
2334 	{
2335 		resetLanguageKinds (language, true);
2336 		p++;
2337 	}
2338 	else if (*p != '+'  &&  *p != '-')
2339 		resetLanguageKinds (language, false);
2340 
2341 	longName = vStringNewOrClearWithAutoRelease (longName);
2342 
2343 	while ((c = *p++) != '\0')
2344 	{
2345 		switch (c)
2346 		{
2347 		case '+':
2348 			if (inLongName)
2349 				vStringPut (longName, c);
2350 			else
2351 				mode = true;
2352 			break;
2353 		case '-':
2354 			if (inLongName)
2355 				vStringPut (longName, c);
2356 			else
2357 				mode = false;
2358 			break;
2359 		case '{':
2360 			if (inLongName)
2361 				error(FATAL,
2362 				      "unexpected character in kind specification: \'%c\'",
2363 				      c);
2364 			inLongName = true;
2365 			break;
2366 		case '}':
2367 			if (!inLongName)
2368 				error(FATAL,
2369 				      "unexpected character in kind specification: \'%c\'",
2370 				      c);
2371 			k = vStringValue (longName);
2372 			r = enableLanguageKindForName (language, k, mode);
2373 			if (! r)
2374 				error (WARNING, "Unsupported kind: '%s' for --%s option",
2375 				       k, option);
2376 
2377 			inLongName = false;
2378 			vStringClear (longName);
2379 			break;
2380 		default:
2381 			if (inLongName)
2382 				vStringPut (longName, c);
2383 			else
2384 			{
2385 				r = enableLanguageKindForLetter (language, c, mode);
2386 				if (! r)
2387 					error (WARNING, "Unsupported kind: '%c' for --%s option",
2388 					       c, option);
2389 			}
2390 			break;
2391 		}
2392 	}
2393 }
2394 
freeKdef(kindDefinition * kdef)2395 static void freeKdef (kindDefinition *kdef)
2396 {
2397 	eFree (kdef->name);
2398 	eFree (kdef->description);
2399 	eFree (kdef);
2400 }
2401 
extractDescriptionAndFlags(const char * input,const char ** flags)2402 static char *extractDescriptionAndFlags(const char *input, const char **flags)
2403 {
2404 	vString *vdesc = vStringNew();
2405 	bool escaped = false;
2406 
2407 	if (flags)
2408 		*flags = NULL;
2409 
2410 	while (*input != '\0')
2411 	{
2412 		if (escaped)
2413 		{
2414 			vStringPut (vdesc, *input);
2415 			escaped = false;
2416 
2417 		}
2418 		else if (*input == '\\')
2419 			escaped = true;
2420 		else if (*input == LONG_FLAGS_OPEN)
2421 		{
2422 			if (flags)
2423 				*flags = input;
2424 			break;
2425 		}
2426 		else
2427 			vStringPut (vdesc, *input);
2428 		input++;
2429 	}
2430 	return vStringDeleteUnwrap(vdesc);
2431 }
2432 
pre_kind_def_flag_refonly_long(const char * const optflag,const char * const param,void * data)2433 static void pre_kind_def_flag_refonly_long (const char* const optflag,
2434 											const char* const param, void* data)
2435 {
2436 	kindDefinition *kdef = data;
2437 	kdef->referenceOnly = true;
2438 }
2439 
2440 static flagDefinition PreKindDefFlagDef [] = {
2441 	{ '\0', "_refonly", NULL, pre_kind_def_flag_refonly_long,
2442 	  NULL, "use this kind reference tags only"},
2443 };
2444 
processLangDefineKind(const langType language,const char * const option,const char * const parameter)2445 static bool processLangDefineKind(const langType language,
2446 								  const char *const option,
2447 								  const char *const parameter)
2448 {
2449 	parserObject *parser;
2450 
2451 	kindDefinition *kdef;
2452 	char letter;
2453 	const char * p = parameter;
2454 	char *name;
2455 	char *description;
2456 	const char *name_start;
2457 	const char *marker_end;
2458 	size_t name_len;
2459 	const char *flags;
2460 
2461 
2462 	Assert (0 <= language  &&  language < (int) LanguageCount);
2463 	parser = LanguageTable + language;
2464 
2465 	Assert (p);
2466 
2467 	if (p[0] == '\0')
2468 		error (FATAL, "no kind definition specified in \"--%s\" option", option);
2469 
2470 	letter = p[0];
2471 	if (letter == ',')
2472 		error (FATAL, "no kind letter specified in \"--%s\" option", option);
2473 	if (/* See #1697. isalnum expects 0~255 as the range of characters. */
2474 		!isalpha ((unsigned char)letter)
2475 		)
2476 		error (FATAL, "the kind letter given in \"--%s\" option is not an alphabet", option);
2477 	else if (letter == KIND_FILE_DEFAULT_LETTER)
2478 		error (FATAL, "the kind letter `%c' in \"--%s\" option is reserved for \"%s\" kind",
2479 			   KIND_FILE_DEFAULT_LETTER, option, KIND_FILE_DEFAULT_NAME);
2480 	else if (getKindForLetter (parser->kindControlBlock, letter))
2481 	{
2482 		error (WARNING, "the kind for letter `%c' specified in \"--%s\" option is already defined.",
2483 			   letter, option);
2484 		return true;
2485 	}
2486 
2487 	if (p[1] != ',')
2488 		error (FATAL, "wrong kind definition in \"--%s\" option: no comma after letter", option);
2489 
2490 	p += 2;
2491 	if (p[0] == '\0')
2492 		error (FATAL, "no kind name specified in \"--%s\" option", option);
2493 	marker_end = strchr (p, ',');
2494 	if (!marker_end)
2495 		error (FATAL, "no kind description specified in \"--%s\" option", option);
2496 
2497 	name_start = p;
2498 	while (p != marker_end)
2499 	{
2500 		if (p == name_start)
2501 		{
2502 			if (!isalpha(*p))
2503 			{
2504 				char *name_in_msg = eStrndup (name_start, marker_end - name_start);
2505 				error (FATAL,
2506 					   "a kind name doesn't start with an alphabetical character: "
2507 					   "'%s' in \"--%s\" option",
2508 					   name_in_msg, option);
2509 			}
2510 		}
2511 		else
2512 		{
2513 			if (!isalnum (*p))
2514 			{
2515 				char *name_in_msg = eStrndup (name_start, marker_end - name_start);
2516 				error (FATAL,
2517 					   "non-alphanumeric char is used as part of kind name: "
2518 					   "'%s' in \"--%s\" option",
2519 					   name_in_msg, option);
2520 			}
2521 		}
2522 		p++;
2523 	}
2524 
2525 	if (marker_end == name_start)
2526 		error (FATAL, "the kind name in \"--%s\" option is empty", option);
2527 
2528 	name_len = marker_end - name_start;
2529 	if (strncmp (name_start, KIND_FILE_DEFAULT_NAME, name_len) == 0)
2530 		error (FATAL,
2531 			   "the kind name " KIND_FILE_DEFAULT_NAME " in \"--%s\" option is reserved",
2532 			   option);
2533 
2534 	name = eStrndup (name_start, name_len);
2535 	if (getKindForName (parser->kindControlBlock, name))
2536 	{
2537 		error (WARNING, "the kind for name `%s' specified in \"--%s\" option is already defined.",
2538 			   name, option);
2539 		eFree (name);
2540 		return true;
2541 	}
2542 
2543 	p++;
2544 	if (p [0] == '\0' || p [0] == LONG_FLAGS_OPEN)
2545 		error (FATAL, "found an empty kind description in \"--%s\" option", option);
2546 
2547 	description = extractDescriptionAndFlags (p, &flags);
2548 
2549 	kdef = xCalloc (1, kindDefinition);
2550 	kdef->enabled = true;
2551 	kdef->letter = letter;
2552 	kdef->name = name;
2553 	kdef->description = description;
2554 	if (flags)
2555 		flagsEval (flags, PreKindDefFlagDef, ARRAY_SIZE (PreKindDefFlagDef), kdef);
2556 
2557 	defineKind (parser->kindControlBlock, kdef, freeKdef);
2558 	return true;
2559 }
2560 
freeRdef(roleDefinition * rdef)2561 static void freeRdef (roleDefinition *rdef)
2562 {
2563 	eFree (rdef->name);
2564 	eFree (rdef->description);
2565 	eFree (rdef);
2566 }
2567 
processLangDefineRole(const langType language,const char * const kindSpec,const char * const option,const char * const parameter)2568 static bool processLangDefineRole(const langType language,
2569 								  const char *const kindSpec,
2570 								  const char *const option,
2571 								  const char *const parameter)
2572 {
2573 	parserObject *parser;
2574 
2575 	kindDefinition *kdef;
2576 	roleDefinition *rdef;
2577 	char *name;
2578 	char *description;
2579 
2580 	Assert (0 <= language  &&  language < (int) LanguageCount);
2581 	Assert (parameter);
2582 
2583 	parser = LanguageTable + language;
2584 
2585 	if (*kindSpec == '{')
2586 	{
2587 		const char *end = strchr (kindSpec, '}');
2588 		if (end == NULL)
2589 			error (FATAL, "no '}' representing the end of kind name in --%s option: %s",
2590 				   option, kindSpec);
2591 		if (*(end + 1) != '\0')
2592 			error (FATAL, "garbage after the kind specification %s in --%s option",
2593 				   kindSpec, option);
2594 		char *kindName = eStrndup (kindSpec + 1, end - (kindSpec + 1));
2595 		if (strcmp (kindName, KIND_FILE_DEFAULT_NAME) == 0)
2596 			error (FATAL, "don't define a role for %c/%s kind; it has no role: --%s",
2597 				   KIND_FILE_DEFAULT_LETTER, KIND_FILE_DEFAULT_NAME,
2598 				   option);
2599 		kdef = getKindForName (parser->kindControlBlock, kindName);
2600 		if (kdef == NULL)
2601 			error (FATAL, "the kind for name `%s' specified in \"--%s\" option is not defined.",
2602 				   kindName, option);
2603 		eFree (kindName);
2604 	}
2605 	else
2606 	{
2607 		char kletter = *kindSpec;
2608 		if (!isalnum ((unsigned char)kletter))
2609 			error (FATAL, "the kind letter given in \"--%s\" option is not an alphabet or a number", option);
2610 		else if (kletter == KIND_FILE_DEFAULT_LETTER)
2611 			error (FATAL, "the kind letter `%c' in \"--%s\" option is reserved for \"%s\" kind, and no role can be attached to it",
2612 				   KIND_FILE_DEFAULT_LETTER, option, KIND_FILE_DEFAULT_NAME);
2613 		else if (*(kindSpec + 1) != '\0')
2614 			error (FATAL, "more than one letters are specified as a kind spec in \"--%s\" option: use `{' and `}' for specifying a kind name",
2615 				   option);
2616 
2617 		kdef = getKindForLetter (parser->kindControlBlock, kletter);
2618 		if (kdef == NULL)
2619 		{
2620 			error (FATAL, "the kind for letter `%c' specified in \"--%s\" option is not defined.",
2621 				   *kindSpec, option);
2622 			return true;
2623 		}
2624 	}
2625 
2626 	const char * p = parameter;
2627 	const char *tmp_end = strchr (p, ',');
2628 	if (!tmp_end)
2629 		error (FATAL, "no role description specified in \"--%s\" option", option);
2630 
2631 	const char * tmp_start = p;
2632 	while (p != tmp_end)
2633 	{
2634 		if (!isalnum (*p))
2635 			error (FATAL, "unacceptable char as part of role name in \"--%s\" option: %c",
2636 				   option, *p);
2637 		p++;
2638 	}
2639 
2640 	if (tmp_end == tmp_start)
2641 		error (FATAL, "the role name in \"--%s\" option is empty", option);
2642 
2643 	name = eStrndup (tmp_start, tmp_end - tmp_start);
2644 	if (getRoleForName (parser->kindControlBlock, kdef->id, name))
2645 	{
2646 		error (WARNING, "the role for name `%s' specified in \"--%s\" option is already defined.",
2647 			   name, option);
2648 		eFree (name);
2649 		return true;
2650 	}
2651 
2652 	p++;
2653 	if (p [0] == '\0' || p [0] == LONG_FLAGS_OPEN)
2654 		error (FATAL, "found an empty role description in \"--%s\" option", option);
2655 
2656 	const char *flags;
2657 	description = extractDescriptionAndFlags (p, &flags);
2658 
2659 	rdef = xCalloc (1, roleDefinition);
2660 	rdef->enabled = true;
2661 	rdef->name = name;
2662 	rdef->description = description;
2663 
2664 	if (flags)
2665 		flagsEval (flags, NULL, 0, rdef);
2666 
2667 	defineRole (parser->kindControlBlock, kdef->id, rdef, freeRdef);
2668 
2669 	return true;
2670 }
2671 
processKinddefOption(const char * const option,const char * const parameter)2672 extern bool processKinddefOption (const char *const option, const char * const parameter)
2673 {
2674 	langType language;
2675 
2676 	language = getLanguageComponentInOption (option, "kinddef-");
2677 	if (language == LANG_IGNORE)
2678 		return false;
2679 
2680 	return processLangDefineKind (language, option, parameter);
2681 }
2682 
processRoledefOption(const char * const option,const char * const parameter)2683 extern bool processRoledefOption (const char *const option, const char * const parameter)
2684 {
2685 #define PREFIX "_roledef-"
2686 #define PREFIX_LEN strlen(PREFIX)
2687 
2688 	langType language = getLanguageComponentInOption (option, PREFIX);
2689 	if (language == LANG_IGNORE)
2690 		return false;
2691 
2692 	Assert (0 <= language  &&  language < (int) LanguageCount);
2693 	const char* kindSpec = option + PREFIX_LEN + strlen (getLanguageName (language));
2694 	if (*kindSpec == '\0')
2695 		error (FATAL, "no kind is specifined in \"--%s=%s\"", option, parameter);
2696 	if (*kindSpec != '.')
2697 		error (FATAL, "no delimiter (.) where a kindspec starts is found in \"--%s\": %c",
2698 			   option, *kindSpec);
2699 	kindSpec++;
2700 
2701 	return processLangDefineRole (language, kindSpec, option, parameter);
2702 #undef PREFIX
2703 #undef PREFIX_LEN
2704 }
2705 
2706 struct langKindDefinitionStruct {
2707 	const char *const option;
2708 	const char *const parameter;
2709 };
processLangKindDefinitionEach(langType lang,void * user_data)2710 static void processLangKindDefinitionEach(
2711 	langType lang, void* user_data)
2712 {
2713 	struct langKindDefinitionStruct *arg = user_data;
2714 	processLangKindDefinition (lang, arg->option, arg->parameter);
2715 }
2716 
parameterEnablingAllOrFileKind(const char * const option,const char * const parameter,bool following_plus_or_minus_op)2717 static bool parameterEnablingAllOrFileKind (const char *const option,
2718 											const char *const parameter,
2719 											bool following_plus_or_minus_op)
2720 {
2721 	size_t file_long_flag_len = strlen(KIND_FILE_DEFAULT_NAME);
2722 
2723 	switch (parameter[0])
2724 	{
2725 	/* Though only '*' is documented as an acceptable kind spec for
2726 	 * --kinds-all option in our man page, we accept '\0' here because
2727 	 * it will be useful for testing purpose. */
2728 	case '\0':
2729 		if (following_plus_or_minus_op)
2730 			error(FATAL, "no kind specification after + (or -) in --%s option",
2731 				  option);
2732 		else
2733 			return true;
2734 	case '+':
2735 	case '-':
2736 		if (following_plus_or_minus_op)
2737 			error(FATAL, "don't repeat + (nor -) in --%s option",
2738 				  option);
2739 		else
2740 			return parameterEnablingAllOrFileKind (option, parameter + 1, true);
2741 	case KIND_WILDCARD_LETTER:
2742 		if (following_plus_or_minus_op)
2743 			error(FATAL, "don't use '*' after + (nor -) in --%s option",
2744 				  option);
2745 		else
2746 			return parameterEnablingAllOrFileKind (option, parameter + 1, false);
2747 	case KIND_FILE_DEFAULT_LETTER:
2748 		return parameterEnablingAllOrFileKind (option, parameter + 1, false);
2749 	case '{':
2750 		if (strncmp (parameter + 1, KIND_FILE_DEFAULT_NAME, file_long_flag_len) == 0
2751 			&& parameter [1 + file_long_flag_len] == '}')
2752 			return parameterEnablingAllOrFileKind (option,
2753 												   parameter + 1 + file_long_flag_len + 1,
2754 												   false);
2755 		break;
2756 	}
2757 	return false;
2758 }
2759 
processKindsOption(const char * const option,const char * const parameter)2760 extern bool processKindsOption (
2761 		const char *const option, const char *const parameter)
2762 {
2763 #define PREFIX "kinds-"
2764 #define PREFIX_LEN strlen(PREFIX)
2765 
2766 	bool handled = false;
2767 	struct langKindDefinitionStruct arg = {
2768 		.option = option,
2769 		.parameter = parameter,
2770 	};
2771 	langType language;
2772 
2773 	const char* const dash = strchr (option, '-');
2774 	if (dash != NULL  &&
2775 		(strcmp (dash + 1, "kinds") == 0  ||  strcmp (dash + 1, "types") == 0))
2776 	{
2777 		size_t len = dash - option;
2778 		char *langName = eStrndup (option, len);
2779 
2780 		if ((len == 3) && (strcmp (langName, RSV_LANG_ALL) == 0))
2781 		{
2782 			error (WARNING,
2783 				   "\"--%s\" option is obsolete; use \"--kinds-%s\" instead",
2784 				   option, langName);
2785 			if (!parameterEnablingAllOrFileKind (option, parameter, false))
2786 				error (FATAL, "only '*', 'F', \"{file}\" or their combination is acceptable as kind letter for --%s", option);
2787 			foreachLanguage(processLangKindDefinitionEach, &arg);
2788 		}
2789 		else
2790 		{
2791 			language = getNamedLanguage (langName, 0);
2792 			if (language == LANG_IGNORE)
2793 				error (WARNING, "Unknown language \"%s\" in \"%s\" option", langName, option);
2794 			else
2795 				processLangKindDefinition (language, option, parameter);
2796 		}
2797 		eFree (langName);
2798 		handled = true;
2799 	}
2800 	else if ( strncmp (option, PREFIX, PREFIX_LEN) == 0 )
2801 	{
2802 		const char* lang;
2803 
2804 		lang = option + PREFIX_LEN;
2805 		if (lang[0] == '\0')
2806 			error (WARNING, "No language given in \"%s\" option", option);
2807 		else if (strcmp (lang, RSV_LANG_ALL) == 0)
2808 		{
2809 			if (!parameterEnablingAllOrFileKind (option, parameter, false))
2810 				error (FATAL, "only '*', 'F', \"{file}\" or their combination is acceptable as kind letter for --%s", option);
2811 			foreachLanguage(processLangKindDefinitionEach, &arg);
2812 		}
2813 		else
2814 		{
2815 			language = getNamedLanguage (lang, 0);
2816 			if (language == LANG_IGNORE)
2817 				error (WARNING, "Unknown language \"%s\" in \"%s\" option", lang, option);
2818 			else
2819 				processLangKindDefinition (language, option, parameter);
2820 		}
2821 		handled = true;
2822 	}
2823 	return handled;
2824 #undef PREFIX
2825 #undef PREFIX_LEN
2826 }
2827 
2828 /*
2829  * The argument specification for --roles-<LANG>:<KIND>= option
2830  * =====================================================================
2831  *
2832  * --roles-all.*=
2833  * --roles-all=
2834  * => Disable all roles of all kinds in all languages.
2835  *
2836  * --roles-all.*='*'
2837  * --roles-all='*'
2838  * => Enable all roles of all kinds in all languages.
2839  *
2840  * --roles-<LANG>.*=
2841  * --roles-<LANG>=
2842  * => Disable all roles of all kinds.
2843  *
2844  * --roles-<LANG>.*=*
2845  * --roles-<LANG>=*
2846  * => Enable all roles of all kinds.
2847  *
2848  * --roles-<LANG>.{kind}=
2849  * --roles-<LANG>.k=
2850  * => Disable all roles of the kind specified with a letter.
2851  *
2852  * --roles-<LANG>.{kind}=*
2853  * --roles-<LANG>.k=*
2854  * => Enable all roles of the kind specified with a letter.
2855  *
2856  * --roles-<LANG>.{kind}=[+|-]{role}
2857  * --roles-<LANG>.k=[+|-]{role}
2858  * => Enable/disable the role of the kind specified with a letter.
2859  *
2860  *
2861  * Examples of combination
2862  * ---------------------------------------------------------------------
2863  *
2864  * --roles-<LANG>.k0=+{role0}-{role1}{role2}
2865  * --roles-<LANG>.{kind1}=+{role0}-{role1}{role2}
2866  *
2867  *
2868  * How --roledef should be change to align --roles-<LANG> notation
2869  * ---------------------------------------------------------------------
2870  *
2871  * --_roledef-<LANG>.k=role,description
2872  * --_roledef-<LANG>.{kind}=role,description
2873  *
2874  * The notation was
2875  *  --_roledef-<LANG>=k.role,description
2876  *
2877  *
2878  * How --param should be change to align --roles-<LANG> notation
2879  * ---------------------------------------------------------------------
2880  *
2881  * --param-<LANG>.name=argument
2882  *
2883  * The notation was
2884  * --param-<LANG>:name=argument
2885  *
2886  *
2887  * How --paramdef should be to align --roles-<LANG> notation
2888  * ---------------------------------------------------------------------
2889  *
2890  * --_paramdef-<LANG>.name=[ default (desription) ]
2891  *
2892  *
2893  * Discussion: which shoule we use . or : ?
2894  * ---------------------------------------------------------------------
2895  *
2896  * `.' is better because `:' implies fields.
2897  *
2898  */
2899 struct langKindRoleDefinitionStruct {
2900 	int kindIndex;
2901 	const char *const option;
2902 	const char *const parameter;
2903 };
2904 
2905 typedef void (*kindCallback)  (langType language, int kindIndex, void* user_data);
foreachKind(langType language,kindCallback callback,void * user_data)2906 static void foreachKind(langType language, kindCallback callback, void *user_data)
2907 {
2908 	unsigned int c = countLanguageKinds (language);
2909 	for (unsigned int i = 0; i < c; i++)
2910 		callback (language, (int)i, user_data);
2911 }
2912 
resetKindRoles(const langType language,int kindIndex,const bool mode)2913 static void resetKindRoles (const langType language, int kindIndex, const bool mode)
2914 {
2915 	Assert (0 <= language  &&  language < (int) LanguageCount);
2916 	unsigned int c = countLanguageRoles (language, kindIndex);
2917 
2918 	for (unsigned int i = 0; i < c; i++)
2919 	{
2920 		roleDefinition* rdef = getLanguageRole (language, kindIndex, (int)i);
2921 		enableRole (rdef, mode);
2922 	}
2923 }
2924 
resetKindRolesAsCallback(const langType language,int kindIndex,void * user_data)2925 static void resetKindRolesAsCallback (const langType language, int kindIndex, void *user_data)
2926 {
2927 	bool mode = (bool)user_data;
2928 	resetKindRoles (language, kindIndex, mode);
2929 }
2930 
processLangKindRoleDefinition(const langType language,const int kindIndex,const char * const option,const char * const parameter)2931 static void processLangKindRoleDefinition (
2932 		const langType language, const int kindIndex, const char *const option,
2933 		const char *const parameter)
2934 {
2935 	Assert (0 <= language  &&  language < (int) LanguageCount);
2936 	Assert (kindIndex != KIND_GHOST_INDEX);
2937 	initializeParser (language);
2938 
2939 	const char *p = parameter;
2940 	bool mode = true;
2941 
2942 	if (*p == '\0')
2943 	{
2944 		resetKindRoles (language, kindIndex, false);
2945 		return;
2946 	}
2947 	else if (*p != '+' && *p != '-' )
2948 		resetKindRoles (language, kindIndex, false);
2949 
2950 	while (*p != '\0')
2951 	{
2952 		if (*p == '+')
2953 		{
2954 			mode = true;
2955 			p++;
2956 		}
2957 		else if (*p == '-')
2958 		{
2959 			mode = false;
2960 			p++;
2961 		}
2962 		else if (*p == '{')
2963 		{
2964 			p++;
2965 			char *q = strchr (p, '}');
2966 			if (!q)
2967 
2968 				error (FATAL, "no '}' representing the end of role name in --%s option: %s",
2969 					   option, p);
2970 			if (p == q)
2971 				error (FATAL, "empty role for the kind letter: %c",
2972 					   getLanguageKind (language, kindIndex)->letter);
2973 
2974 			char *rname = eStrndup (p,  q - p);
2975 			roleDefinition *rdef = getLanguageRoleForName (language, kindIndex, rname);
2976 			if (!rdef)
2977 				error (WARNING, "no such role: %s of %c kind in language %s",
2978 					   rname, getLanguageKind (language, kindIndex)->letter,
2979 					   getLanguageName (language));
2980 			else
2981 				enableRole (rdef, mode);
2982 			eFree (rname);
2983 			p = q + 1;
2984 		}
2985 		else if (*p == '*')
2986 		{
2987 			resetKindRoles (language, kindIndex, true);
2988 			p++;
2989 		}
2990 		else
2991 			error (FATAL, "unexpected character %c in --%s=%s option",
2992 				   *p, option, parameter);
2993 	}
2994 }
2995 
processLangKindRoleDefinitionEach(langType language,void * user_data)2996 static void processLangKindRoleDefinitionEach (langType language, void* user_data)
2997 {
2998 	struct langKindRoleDefinitionStruct *arg = user_data;
2999 
3000 	if (arg->kindIndex == KIND_GHOST_INDEX)
3001 	{
3002 		initializeParser (language);
3003 		foreachKind (language, resetKindRolesAsCallback,
3004 					 ((*(arg->parameter) == '*')? (void *)true: (void *)false));
3005 	}
3006 	else
3007 		processLangKindRoleDefinition (language, arg->kindIndex,
3008 									   arg->option, arg->parameter);
3009 }
3010 
processRolesOption(const char * const option,const char * const parameter)3011 extern bool processRolesOption (const char *const option, const char *const parameter)
3012 {
3013 #define PREFIX "roles-"
3014 #define PREFIX_LEN strlen(PREFIX)
3015 
3016 	if ( strncmp (option, PREFIX, PREFIX_LEN) != 0 )
3017 		return false;
3018 
3019 	const char* lang = option + PREFIX_LEN;
3020 	if (lang[0] == '\0')
3021 	{
3022 		error (WARNING, "no language given in \"%s\" option", option);
3023 		return true;
3024 	}
3025 
3026 	/*
3027 	 * --roles-all.*=
3028 	 * --roles-all=
3029 	 * => Disable all roles of all kinds in all languages.
3030 	 *
3031 	 * --roles-all.*='*'
3032 	 * --roles-all='*'
3033 	 * => Enable all roles of all kinds in all languages.
3034 	 */
3035 	if (strncmp (lang, RSV_LANG_ALL, strlen(RSV_LANG_ALL)) == 0)
3036 	{
3037 		if (lang [strlen (RSV_LANG_ALL)] == '\0'
3038 			|| (strcmp (lang + strlen (RSV_LANG_ALL), ".*") == 0))
3039 		{
3040 			if (*parameter == '\0'
3041 				|| strcmp(parameter, "*") == 0)
3042 			{
3043 				struct langKindRoleDefinitionStruct arg = {
3044 					.kindIndex = KIND_GHOST_INDEX,
3045 					.option = option,
3046 					.parameter = parameter,
3047 				};
3048 				foreachLanguage (processLangKindRoleDefinitionEach, &arg);
3049 				return true;
3050 			}
3051 			else
3052 				error (FATAL, "only '*' or '' (empty string) is acceptable as an argument for --%s: %s",
3053 					   option,
3054 					   parameter);
3055 		}
3056 		else if (lang [strlen(RSV_LANG_ALL)] == '.')
3057 			error (FATAL, "only '*' or '' (empty string) is acceptable as a kind spec for --%sall: --%s",
3058 				   PREFIX,
3059 				   option);
3060 	}
3061 
3062 	/* Decide the language. */
3063 	langType language;
3064 	const char *dot = strchr (lang, '.');
3065 	if (dot)
3066 		language = getNamedLanguage (lang, dot - lang);
3067 	else
3068 		language = getNamedLanguage (lang, 0);
3069 
3070 	if (language == LANG_IGNORE)
3071 	{
3072 		char *lang0 = dot? eStrndup (lang, dot - lang): NULL;
3073 		error (WARNING, "unknown language \"%s\" in --%s option",
3074 			   (lang0? lang0: lang), option);
3075 		if (lang0)
3076 			eFree (lang0);
3077 		return true;
3078 	}
3079 
3080 	/*
3081 	 * --roles-<LANG>.*=
3082 	 * --roles-<LANG>=
3083 	 * => Disable all roles of all kinds.
3084 	 *
3085 	 * --roles-<LANG>.*=*
3086 	 * --roles-<LANG>=*
3087 	 * => Enable all roles of all kinds.
3088 	 */
3089 	if (dot == NULL || (strcmp (dot, ".*") == 0))
3090 	{
3091 		if (*parameter == '\0'
3092 			|| strcmp(parameter, "*") == 0)
3093 		{
3094 			foreachKind (language, resetKindRolesAsCallback,
3095 						 ((*parameter == '*')? (void*)true: (void*)false));
3096 			return true;
3097 		}
3098 		else
3099 			error (FATAL, "only '*' or '' (empty string) is acceptable as an argument for --%s: %s",
3100 				   option,
3101 				   parameter);
3102 	}
3103 
3104 	/* Decide the kind of the language. */
3105 	parserObject *parser = LanguageTable + language;
3106 	int kindIndex = KIND_GHOST_INDEX;
3107 	const char *kind = dot + 1;
3108 	if (*kind == '{')
3109 	{
3110 		const char *name_end = strchr (kind, '}');
3111 		if (name_end == NULL)
3112 			error (FATAL, "no '}' representing the end of kind name in --%s option: %s",
3113 				   option, kind);
3114 		char *kindName = eStrndup (kind + 1, name_end - (kind + 1));
3115 		if (strcmp (kindName, KIND_FILE_DEFAULT_NAME) == 0)
3116 		{
3117 			error (WARNING, "don't enable/disable a role in %c/%s kind; it has no role: --%s",
3118 				   KIND_FILE_DEFAULT_LETTER, KIND_FILE_DEFAULT_NAME,
3119 				   option);
3120 			return true;
3121 		}
3122 		kindIndex = getKindIndexForName (parser->kindControlBlock, kindName);
3123 		if (kindIndex == KIND_GHOST_INDEX)
3124 		{
3125 			eFree (kindName);
3126 			error (WARNING, "no such kind name as specified in --%s option", option);
3127 			return true;
3128 		}
3129 		if (*(name_end + 1) != '\0')
3130 			error (FATAL, "garbage after the kind specification {%s} in --%s option",
3131 				   kindName, option);
3132 		eFree (kindName);
3133 	}
3134 	else if (isalpha ((unsigned char)*kind))
3135 	{
3136 		if (*kind == KIND_FILE_DEFAULT_LETTER)
3137 		{
3138 			error (WARNING, "don't enable/disable a role in %c/%s kind; it has no role: --%s",
3139 				   KIND_FILE_DEFAULT_LETTER, KIND_FILE_DEFAULT_NAME,
3140 				   option);
3141 			return true;
3142 		}
3143 		kindIndex = getKindIndexForLetter (parser->kindControlBlock, *kind);
3144 		if (kindIndex == KIND_GHOST_INDEX)
3145 		{
3146 			error (WARNING, "no such kind letter as specified in --%s option", option);
3147 			return true;
3148 		}
3149 		if (*(kind + 1) != '\0')
3150 			error (FATAL, "garbage after the kind specification '%c' in --%s option",
3151 				   *kind, option);
3152 	}
3153 	else
3154 		error (FATAL, "'%c', unexpected character in --%s", *kind, option);
3155 
3156 
3157 	/*
3158 	 * --roles-<LANG>.{kind}=
3159 	 * --roles-<LANG>.k=
3160 	 * => Disable all roles of the kind specified with a letter.
3161 	 *
3162 	 * --roles-<LANG>.{kind}=*
3163 	 * --roles-<LANG>.k=*
3164 	 * => Enable all roles of the kind specified with a letter.
3165 	 *
3166 	 * --roles-<LANG>.{kind}=[+|-|]{role}
3167 	 * --roles-<LANG>.k=[+|-|]{role}
3168 	 * => Enable/disable the role of the kind specified with a letter.
3169 	 */
3170 	processLangKindRoleDefinition (language, kindIndex, option, parameter);
3171 
3172 	return true;
3173 #undef PREFIX
3174 #undef PREFIX_LEN
3175 }
3176 
printLanguageRoles(const langType language,const char * kindspecs,bool withListHeader,bool machinable,FILE * fp)3177 extern void printLanguageRoles (const langType language, const char* kindspecs,
3178 								bool withListHeader, bool machinable, FILE *fp)
3179 {
3180 	struct colprintTable *table = roleColprintTableNew();
3181 	parserObject *parser;
3182 
3183 	initializeParser (language);
3184 
3185 	if (language == LANG_AUTO)
3186 	{
3187 		for (unsigned int i = 0  ;  i < LanguageCount  ;  ++i)
3188 		{
3189 			if (!isLanguageVisible (i))
3190 				continue;
3191 
3192 			parser = LanguageTable + i;
3193 			roleColprintAddRoles (table, parser->kindControlBlock, kindspecs);
3194 		}
3195 	}
3196 	else
3197 	{
3198 		parser = LanguageTable + language;
3199 		roleColprintAddRoles (table, parser->kindControlBlock, kindspecs);
3200 	}
3201 
3202 	roleColprintTablePrint (table, (language != LANG_AUTO),
3203 							withListHeader, machinable, fp);
3204 	colprintTableDelete (table);
3205 }
3206 
printKinds(langType language,bool indent,struct colprintTable * table)3207 static void printKinds (langType language, bool indent,
3208 						struct colprintTable * table)
3209 {
3210 	const parserObject *parser;
3211 	struct kindControlBlock *kcb;
3212 	Assert (0 <= language  &&  language < (int) LanguageCount);
3213 
3214 	initializeParser (language);
3215 	parser = LanguageTable + language;
3216 	kcb = parser->kindControlBlock;
3217 
3218 	if (table)
3219 		kindColprintAddLanguageLines (table, kcb);
3220 	else
3221 	{
3222 		for (unsigned int i = 0  ;  i < countKinds(kcb)  ;  ++i)
3223 			printKind (getKind(kcb, i), indent);
3224 	}
3225 }
3226 
printLanguageKinds(const langType language,bool allKindFields,bool withListHeader,bool machinable,FILE * fp)3227 extern void printLanguageKinds (const langType language, bool allKindFields,
3228 								bool withListHeader, bool machinable, FILE *fp)
3229 {
3230 	struct colprintTable * table = NULL;
3231 
3232 	if (allKindFields)
3233 		table = kindColprintTableNew ();
3234 
3235 	if (language == LANG_AUTO)
3236 	{
3237 		for (unsigned int i = 0  ;  i < LanguageCount  ;  ++i)
3238 		{
3239 			const parserDefinition* const lang = LanguageTable [i].def;
3240 
3241 			if (lang->invisible)
3242 				continue;
3243 
3244 			if (!table)
3245 				printf ("%s%s\n", lang->name, isLanguageEnabled (i) ? "" : " [disabled]");
3246 			printKinds (i, true, table);
3247 		}
3248 	}
3249 	else
3250 		printKinds (language, false, table);
3251 
3252 	if (allKindFields)
3253 	{
3254 		kindColprintTablePrint(table, (language == LANG_AUTO)? 0: 1,
3255 							   withListHeader, machinable, fp);
3256 		colprintTableDelete (table);
3257 	}
3258 }
3259 
printParameters(struct colprintTable * table,langType language)3260 static void printParameters (struct colprintTable *table, langType language)
3261 {
3262 	const parserDefinition* lang;
3263 	Assert (0 <= language  &&  language < (int) LanguageCount);
3264 
3265 	initializeParser (language);
3266 	lang = LanguageTable [language].def;
3267 	if (lang->parameterHandlerTable != NULL)
3268 	{
3269 		for (unsigned int i = 0; i < lang->parameterHandlerCount; ++i)
3270 			paramColprintAddParameter(table, language, lang->parameterHandlerTable + i);
3271 	}
3272 
3273 }
3274 
printLanguageParameters(const langType language,bool withListHeader,bool machinable,FILE * fp)3275 extern void printLanguageParameters (const langType language,
3276 									 bool withListHeader, bool machinable, FILE *fp)
3277 {
3278 	struct colprintTable *table =  paramColprintTableNew();
3279 
3280 	if (language == LANG_AUTO)
3281 	{
3282 		for (unsigned int i = 0; i < LanguageCount ; ++i)
3283 		{
3284 			const parserDefinition* const lang = LanguageTable [i].def;
3285 
3286 			if (lang->invisible)
3287 				continue;
3288 
3289 			printParameters (table, i);
3290 		}
3291 	}
3292 	else
3293 		printParameters (table, language);
3294 
3295 	paramColprintTablePrint (table, (language != LANG_AUTO),
3296 							 withListHeader, machinable, fp);
3297 	colprintTableDelete (table);
3298 }
3299 
processLangAliasOption(const langType language,const char * const parameter)3300 static void processLangAliasOption (const langType language,
3301 				    const char *const parameter)
3302 {
3303 	const char* alias;
3304 	const parserObject * parser;
3305 
3306 	Assert (0 <= language  &&  language < (int) LanguageCount);
3307 	parser = LanguageTable + language;
3308 
3309 	if (parameter[0] == '\0')
3310 	{
3311 		clearLanguageAliases (language);
3312 		verbose ("clear aliases for %s\n", parser->def->name);
3313 	}
3314 	else if (strcmp (parameter, RSV_LANGMAP_DEFAULT) == 0)
3315 	{
3316 		installLanguageAliasesDefault (language);
3317 		verbose ("reset aliases for %s\n", parser->def->name);
3318 	}
3319 	else if (parameter[0] == '+')
3320 	{
3321 		alias = parameter + 1;
3322 		addLanguageAlias(language, alias);
3323 		verbose ("add an alias %s to %s\n", alias, parser->def->name);
3324 	}
3325 	else if (parameter[0] == '-')
3326 	{
3327 		if (parser->currentAliases)
3328 		{
3329 			alias = parameter + 1;
3330 			if (stringListDeleteItemExtension (parser->currentAliases, alias))
3331 			{
3332 				verbose ("remove an alias %s from %s\n", alias, parser->def->name);
3333 			}
3334 		}
3335 	}
3336 	else
3337 	{
3338 		alias = parameter;
3339 		clearLanguageAliases (language);
3340 		addLanguageAlias(language, alias);
3341 		verbose ("set alias %s to %s\n", alias, parser->def->name);
3342 	}
3343 
3344 }
3345 
processAliasOption(const char * const option,const char * const parameter)3346 extern bool processAliasOption (
3347 		const char *const option, const char *const parameter)
3348 {
3349 	langType language;
3350 
3351 	Assert (parameter);
3352 
3353 #define PREFIX "alias-"
3354 	if (strcmp (option, "alias-" RSV_LANG_ALL) == 0)
3355 	{
3356 		if ((parameter[0] == '\0')
3357 			|| (strcmp (parameter, RSV_LANGMAP_DEFAULT) == 0))
3358 		{
3359 			for (unsigned int i = 0; i < LanguageCount; i++)
3360 			{
3361 				clearLanguageAliases (i);
3362 				verbose ("clear aliases for %s\n", getLanguageName(i));
3363 			}
3364 
3365 			if (parameter[0] != '\0')
3366 			{
3367 				verbose ("  Installing default language aliases:\n");
3368 				installLanguageAliasesDefaults ();
3369 			}
3370 		}
3371 		else
3372 		{
3373 			error (WARNING, "Use \"%s\" option for reset (\"default\") or clearing (\"\")", option);
3374 			return false;
3375 		}
3376 		return true;
3377 	}
3378 
3379 	language = getLanguageComponentInOption (option, "alias-");
3380 	if (language == LANG_IGNORE)
3381 		return false;
3382 #undef PREFIX
3383 
3384 	processLangAliasOption (language, parameter);
3385 	return true;
3386 }
3387 
printMaps(const langType language,langmapType type)3388 static void printMaps (const langType language, langmapType type)
3389 {
3390 	const parserObject* parser;
3391 	unsigned int i;
3392 
3393 	parser = LanguageTable + language;
3394 	printf ("%-8s", parser->def->name);
3395 	if (parser->currentPatterns != NULL && (type & LMAP_PATTERN))
3396 		for (i = 0  ;  i < stringListCount (parser->currentPatterns)  ;  ++i)
3397 			printf (" %s", vStringValue (
3398 						stringListItem (parser->currentPatterns, i)));
3399 	if (parser->currentExtensions != NULL && (type & LMAP_EXTENSION))
3400 		for (i = 0  ;  i < stringListCount (parser->currentExtensions)  ;  ++i)
3401 			printf (" *.%s", vStringValue (
3402 						stringListItem (parser->currentExtensions, i)));
3403 	putchar ('\n');
3404 }
3405 
mapColprintTableNew(langmapType type)3406 static struct colprintTable *mapColprintTableNew (langmapType type)
3407 {
3408 	if ((type & LMAP_ALL) == LMAP_ALL)
3409 		return colprintTableNew ("L:LANGUAGE", "L:TYPE", "L:MAP", NULL);
3410 	else if (type & LMAP_PATTERN)
3411 		return colprintTableNew ("L:LANGUAGE", "L:PATTERN", NULL);
3412 	else if (type & LMAP_EXTENSION)
3413 		return colprintTableNew ("L:LANGUAGE", "L:EXTENSION", NULL);
3414 	else
3415 	{
3416 		AssertNotReached ();
3417 		return NULL;
3418 	}
3419 }
3420 
mapColprintAddLanguage(struct colprintTable * table,langmapType type,const parserObject * parser)3421 static void mapColprintAddLanguage (struct colprintTable * table,
3422 									langmapType type,
3423 									const parserObject* parser)
3424 {
3425 	struct colprintLine * line;
3426 	unsigned int count;
3427 	unsigned int i;
3428 
3429 	if ((type & LMAP_PATTERN) && (0 < (count = stringListCount (parser->currentPatterns))))
3430 	{
3431 		for (i = 0; i < count; i++)
3432 		{
3433 			line = colprintTableGetNewLine (table);
3434 			vString *pattern = stringListItem (parser->currentPatterns, i);
3435 
3436 			colprintLineAppendColumnCString (line, parser->def->name);
3437 			if (type & LMAP_EXTENSION)
3438 				colprintLineAppendColumnCString (line, "pattern");
3439 			colprintLineAppendColumnVString (line, pattern);
3440 		}
3441 	}
3442 
3443 	if ((type & LMAP_EXTENSION) && (0 < (count = stringListCount (parser->currentExtensions))))
3444 	{
3445 		for (i = 0; i < count; i++)
3446 		{
3447 			line = colprintTableGetNewLine (table);
3448 			vString *extension = stringListItem (parser->currentExtensions, i);
3449 
3450 			colprintLineAppendColumnCString (line, parser->def->name);
3451 			if (type & LMAP_PATTERN)
3452 				colprintLineAppendColumnCString (line, "extension");
3453 			colprintLineAppendColumnVString (line, extension);
3454 		}
3455 	}
3456 }
3457 
printLanguageMaps(const langType language,langmapType type,bool withListHeader,bool machinable,FILE * fp)3458 extern void printLanguageMaps (const langType language, langmapType type,
3459 							   bool withListHeader, bool machinable, FILE *fp)
3460 {
3461 	/* DON'T SORT THE LIST
3462 
3463 	   The order of listing should be equal to the order of matching
3464 	   for the parser selection. */
3465 
3466 	struct colprintTable * table = NULL;
3467 	if (type & LMAP_TABLE_OUTPUT)
3468 		table = mapColprintTableNew(type);
3469 
3470 	if (language == LANG_AUTO)
3471 	{
3472 		for (unsigned int i = 0  ;  i < LanguageCount  ;  ++i)
3473 		{
3474 			if (!isLanguageVisible (i))
3475 				continue;
3476 
3477 			if (type & LMAP_TABLE_OUTPUT)
3478 			{
3479 				const parserObject* parser = LanguageTable + i;
3480 
3481 				mapColprintAddLanguage (table, type, parser);
3482 			}
3483 			else
3484 				printMaps (i, type);
3485 		}
3486 	}
3487 	else
3488 	{
3489 		Assert (0 <= language  &&  language < (int) LanguageCount);
3490 
3491 		if (type & LMAP_TABLE_OUTPUT)
3492 		{
3493 			const parserObject* parser = LanguageTable + language;
3494 
3495 			mapColprintAddLanguage (table, type, parser);
3496 		}
3497 		else
3498 			printMaps (language, type);
3499 	}
3500 
3501 
3502 	if (type & LMAP_TABLE_OUTPUT)
3503 	{
3504 		colprintTablePrint (table, (language == LANG_AUTO)? 0: 1,
3505 							withListHeader, machinable, fp);
3506 		colprintTableDelete (table);
3507 	}
3508 }
3509 
aliasColprintTableNew(void)3510 static struct colprintTable *aliasColprintTableNew (void)
3511 {
3512 	return colprintTableNew ("L:LANGUAGE", "L:ALIAS", NULL);
3513 }
3514 
aliasColprintAddLanguage(struct colprintTable * table,const parserObject * parser)3515 static void aliasColprintAddLanguage (struct colprintTable * table,
3516 									  const parserObject* parser)
3517 {
3518 	unsigned int count;
3519 
3520 	if (parser->currentAliases && (0 < (count = stringListCount (parser->currentAliases))))
3521 	{
3522 		for (unsigned int i = 0; i < count; i++)
3523 		{
3524 			struct colprintLine * line = colprintTableGetNewLine (table);
3525 			vString *alias = stringListItem (parser->currentAliases, i);;
3526 
3527 			colprintLineAppendColumnCString (line, parser->def->name);
3528 			colprintLineAppendColumnVString (line, alias);
3529 		}
3530 	}
3531 }
3532 
printLanguageAliases(const langType language,bool withListHeader,bool machinable,FILE * fp)3533 extern void printLanguageAliases (const langType language,
3534 								  bool withListHeader, bool machinable, FILE *fp)
3535 {
3536 	/* DON'T SORT THE LIST
3537 
3538 	   The order of listing should be equal to the order of matching
3539 	   for the parser selection. */
3540 
3541 	struct colprintTable * table = aliasColprintTableNew();
3542 	const parserObject* parser;
3543 
3544 	if (language == LANG_AUTO)
3545 	{
3546 		for (unsigned int i = 0; i < LanguageCount; ++i)
3547 		{
3548 			parser = LanguageTable + i;
3549 			if (parser->def->invisible)
3550 				continue;
3551 
3552 			aliasColprintAddLanguage (table, parser);
3553 		}
3554 	}
3555 	else
3556 	{
3557 		Assert (0 <= language  &&  language < (int) LanguageCount);
3558 		parser = LanguageTable + language;
3559 		aliasColprintAddLanguage (table, parser);
3560 	}
3561 
3562 	colprintTablePrint (table, (language == LANG_AUTO)? 0: 1,
3563 						withListHeader, machinable, fp);
3564 	colprintTableDelete (table);
3565 }
3566 
printLanguage(const langType language,parserDefinition ** ltable)3567 static void printLanguage (const langType language, parserDefinition** ltable)
3568 {
3569 	const parserDefinition* lang;
3570 	Assert (0 <= language  &&  language < (int) LanguageCount);
3571 	lang = ltable [language];
3572 
3573 	if (lang->invisible)
3574 		return;
3575 
3576 	printf ("%s%s\n", lang->name, isLanguageEnabled (lang->id) ? "" : " [disabled]");
3577 }
3578 
printLanguageList(void)3579 extern void printLanguageList (void)
3580 {
3581 	unsigned int i;
3582 	parserDefinition **ltable;
3583 
3584 	ltable = xMalloc (LanguageCount, parserDefinition*);
3585 	for (i = 0 ; i < LanguageCount ; ++i)
3586 		ltable[i] = LanguageTable[i].def;
3587 	qsort (ltable, LanguageCount, sizeof (parserDefinition*), compareParsersByName);
3588 
3589 	for (i = 0  ;  i < LanguageCount  ;  ++i)
3590 		printLanguage (i, ltable);
3591 
3592 	eFree (ltable);
3593 }
3594 
xtagDefinitionDestroy(xtagDefinition * xdef)3595 static void xtagDefinitionDestroy (xtagDefinition *xdef)
3596 {
3597 	eFree ((void *)xdef->name);
3598 	eFree ((void *)xdef->description);
3599 	eFree (xdef);
3600 }
3601 
processLangDefineExtra(const langType language,const char * const option,const char * const parameter)3602 static bool processLangDefineExtra (const langType language,
3603 									const char *const option,
3604 									const char *const parameter)
3605 {
3606 	xtagDefinition *xdef;
3607 	const char * p = parameter;
3608 	const char *name_end;
3609 	const char *desc;
3610 	const char *flags;
3611 
3612 	Assert (0 <= language  &&  language < (int) LanguageCount);
3613 	Assert (p);
3614 
3615 	if (p[0] == '\0')
3616 		error (FATAL, "no extra definition specified in \"--%s\" option", option);
3617 
3618 	name_end = strchr (p, ',');
3619 	if (!name_end)
3620 		error (FATAL, "no extra description specified in \"--%s\" option", option);
3621 	else if (name_end == p)
3622 		error (FATAL, "the extra name in \"--%s\" option is empty", option);
3623 
3624 	for (; p < name_end; p++)
3625 	{
3626 		if (!isalnum (*p))
3627 			error (FATAL, "unacceptable char as part of extra name in \"--%s\" option",
3628 				   option);
3629 	}
3630 
3631 	p++;
3632 	if (p [0] == '\0' || p [0] == LONG_FLAGS_OPEN)
3633 		error (FATAL, "extra description in \"--%s\" option is empty", option);
3634 
3635 	desc = extractDescriptionAndFlags (p, &flags);
3636 
3637 	xdef = xCalloc (1, xtagDefinition);
3638 	xdef->enabled = false;
3639 	xdef->letter = NUL_XTAG_LETTER;
3640 	xdef->name = eStrndup (parameter, name_end - parameter);
3641 	xdef->description = desc;
3642 	xdef->isEnabled = NULL;
3643 	DEFAULT_TRASH_BOX(xdef, xtagDefinitionDestroy);
3644 
3645 	if (flags)
3646 		flagsEval (flags, NULL, 0, xdef);
3647 
3648 	defineXtag (xdef, language);
3649 
3650 	return true;
3651 }
3652 
processExtradefOption(const char * const option,const char * const parameter)3653 extern bool processExtradefOption (const char *const option, const char *const parameter)
3654 {
3655 	langType language;
3656 
3657 	language = getLanguageComponentInOption (option, "_" "extradef-");
3658 	if (language == LANG_IGNORE)
3659 		return false;
3660 
3661 	return processLangDefineExtra (language, option, parameter);
3662 }
3663 
fieldDefinitionDestroy(fieldDefinition * fdef)3664 static void fieldDefinitionDestroy (fieldDefinition *fdef)
3665 {
3666 	eFree ((void *)fdef->description);
3667 	eFree ((void *)fdef->name);
3668 	eFree (fdef);
3669 }
3670 
processLangDefineField(const langType language,const char * const option,const char * const parameter)3671 static bool processLangDefineField (const langType language,
3672 									const char *const option,
3673 									const char *const parameter)
3674 {
3675 	fieldDefinition *fdef;
3676 	const char * p = parameter;
3677 	const char *name_end;
3678 	const char *desc;
3679 	const char *flags;
3680 
3681 	Assert (0 <= language  &&  language < (int) LanguageCount);
3682 	Assert (p);
3683 
3684 	if (p[0] == '\0')
3685 		error (FATAL, "no field definition specified in \"--%s\" option", option);
3686 
3687 	name_end = strchr (p, ',');
3688 	if (!name_end)
3689 		error (FATAL, "no field description specified in \"--%s\" option", option);
3690 	else if (name_end == p)
3691 		error (FATAL, "the field name in \"--%s\" option is empty", option);
3692 
3693 	for (; p < name_end; p++)
3694 	{
3695 		if (!isalpha (*p))
3696 			error (FATAL, "unacceptable char as part of field name in \"--%s\" option",
3697 				   option);
3698 	}
3699 
3700 	p++;
3701 	if (p [0] == '\0' || p [0] == LONG_FLAGS_OPEN)
3702 		error (FATAL, "field description in \"--%s\" option is empty", option);
3703 
3704 	desc = extractDescriptionAndFlags (p, &flags);
3705 
3706 	fdef = xCalloc (1, fieldDefinition);
3707 	fdef->enabled = false;
3708 	fdef->letter = NUL_FIELD_LETTER;
3709 	fdef->name = eStrndup(parameter, name_end - parameter);
3710 	fdef->description = desc;
3711 	fdef->isValueAvailable = NULL;
3712 	fdef->getValueObject = NULL;
3713 	fdef->getterValueType = NULL;
3714 	fdef->setValueObject = NULL;
3715 	fdef->setterValueType = NULL;
3716 	fdef->checkValueForSetter = NULL;
3717 	fdef->dataType = FIELDTYPE_STRING; /* TODO */
3718 	fdef->ftype = FIELD_UNKNOWN;
3719 	DEFAULT_TRASH_BOX(fdef, fieldDefinitionDestroy);
3720 
3721 	if (flags)
3722 		flagsEval (flags, NULL, 0, fdef);
3723 
3724 	defineField (fdef, language);
3725 
3726 	return true;
3727 }
3728 
processFielddefOption(const char * const option,const char * const parameter)3729 extern bool processFielddefOption (const char *const option, const char *const parameter)
3730 {
3731 	langType language;
3732 
3733 	language = getLanguageComponentInOption (option, "_fielddef-");
3734 	if (language == LANG_IGNORE)
3735 		return false;
3736 
3737 	return processLangDefineField (language, option, parameter);
3738 }
3739 
3740 /*
3741 *   File parsing
3742 */
3743 
createTagsForFile(const langType language,const unsigned int passCount)3744 static rescanReason createTagsForFile (const langType language,
3745 				       const unsigned int passCount)
3746 {
3747 	parserDefinition *const lang = LanguageTable [language].def;
3748 	rescanReason rescan = RESCAN_NONE;
3749 
3750 	resetInputFile (language);
3751 
3752 	Assert (lang->parser || lang->parser2);
3753 
3754 	notifyInputStart ();
3755 
3756 	if (lang->parser != NULL)
3757 		lang->parser ();
3758 	else if (lang->parser2 != NULL)
3759 		rescan = lang->parser2 (passCount);
3760 
3761 	notifyInputEnd ();
3762 
3763 	return rescan;
3764 }
3765 
notifyLanguageRegexInputStart(langType language)3766 extern void notifyLanguageRegexInputStart (langType language)
3767 {
3768 	parserObject *pobj = LanguageTable + language;
3769 	parserDefinition *pdef = pobj->def;
3770 
3771 	notifyRegexInputStart(pobj->lregexControlBlock);
3772 	for (unsigned int i = 0; i < pdef->dependencyCount; i++)
3773 	{
3774 		parserDependency *d = pdef->dependencies + i;
3775 		if (d->type != DEPTYPE_FOREIGNER)
3776 			continue;
3777 		langType foreigner = getNamedLanguage (d->upperParser, 0);
3778 		if (foreigner == LANG_IGNORE)
3779 			continue;
3780 
3781 		notifyLanguageRegexInputStart (foreigner);
3782 	}
3783 }
3784 
notifyLanguageRegexInputEnd(langType language)3785 extern void notifyLanguageRegexInputEnd (langType language)
3786 {
3787 	parserObject *pobj = LanguageTable + language;
3788 	parserDefinition *pdef = pobj->def;
3789 
3790 	for (unsigned int i = 0; i < pdef->dependencyCount; i++)
3791 	{
3792 		parserDependency *d = pdef->dependencies + i;
3793 		if (d->type != DEPTYPE_FOREIGNER)
3794 			continue;
3795 		langType foreigner = getNamedLanguage (d->upperParser, 0);
3796 		if (foreigner == LANG_IGNORE)
3797 			continue;
3798 
3799 		notifyLanguageRegexInputEnd (foreigner);
3800 	}
3801 	notifyRegexInputEnd((LanguageTable + language)->lregexControlBlock);
3802 }
3803 
parserCorkFlags(parserDefinition * parser)3804 static unsigned int parserCorkFlags (parserDefinition *parser)
3805 {
3806 	subparser *tmp;
3807 	unsigned int r = 0;
3808 
3809 	r |= parser->useCork;
3810 
3811 	if (doesLanguageExpectCorkInRegex (parser->id)
3812 	    || parser->requestAutomaticFQTag)
3813 		r |= CORK_QUEUE;
3814 
3815 	pushLanguage (parser->id);
3816 	foreachSubparser(tmp, true)
3817 	{
3818 		langType t = getSubparserLanguage (tmp);
3819 		r |= parserCorkFlags (LanguageTable[t].def);
3820 	}
3821 	popLanguage ();
3822 	return r;
3823 }
3824 
setupLanguageSubparsersInUse(const langType language)3825 static void setupLanguageSubparsersInUse (const langType language)
3826 {
3827 	subparser *tmp;
3828 
3829 	setupSubparsersInUse ((LanguageTable + language)->slaveControlBlock);
3830 	foreachSubparser(tmp, true)
3831 	{
3832 		langType t = getSubparserLanguage (tmp);
3833 		enterSubparser (tmp);
3834 		setupLanguageSubparsersInUse(t);
3835 		leaveSubparser ();
3836 	}
3837 }
3838 
teardownLanguageSubparsersInUse(const langType language)3839 static subparser* teardownLanguageSubparsersInUse (const langType language)
3840 {
3841 	subparser *tmp;
3842 
3843 	foreachSubparser(tmp, true)
3844 	{
3845 		langType t = getSubparserLanguage (tmp);
3846 		enterSubparser (tmp);
3847 		teardownLanguageSubparsersInUse(t);
3848 		leaveSubparser ();
3849 	}
3850 	return teardownSubparsersInUse ((LanguageTable + language)->slaveControlBlock);
3851 }
3852 
initializeParserStats(parserObject * parser)3853 static void	initializeParserStats (parserObject *parser)
3854 {
3855 	if (Option.printTotals > 1 && parser->used == 0 && parser->def->initStats)
3856 		parser->def->initStats (parser->def->id);
3857 	parser->used = 1;
3858 }
3859 
printParserStatisticsIfUsed(langType language)3860 extern void printParserStatisticsIfUsed (langType language)
3861 {
3862 	parserObject *parser = &(LanguageTable [language]);
3863 
3864 	if (parser->used)
3865 	{
3866 		if (parser->def->printStats)
3867 		{
3868 			fprintf(stderr, "\nSTATISTICS of %s\n", getLanguageName (language));
3869 			fputs("==============================================\n", stderr);
3870 			parser->def->printStats (language);
3871 		}
3872 		printLanguageMultitableStatistics (language);
3873 	}
3874 }
3875 
createTagsWithFallback1(const langType language,langType * exclusive_subparser)3876 static bool createTagsWithFallback1 (const langType language,
3877 									 langType *exclusive_subparser)
3878 {
3879 	bool tagFileResized = false;
3880 	unsigned long numTags	= numTagsAdded ();
3881 	MIOPos tagfpos;
3882 	int lastPromise = getLastPromise ();
3883 	unsigned int passCount = 0;
3884 	rescanReason whyRescan;
3885 	parserObject *parser;
3886 	unsigned int corkFlags;
3887 	bool useCork = false;
3888 
3889 	initializeParser (language);
3890 	parser = &(LanguageTable [language]);
3891 
3892 	setupLanguageSubparsersInUse (language);
3893 
3894 	corkFlags = parserCorkFlags (parser->def);
3895 	useCork = corkFlags & CORK_QUEUE;
3896 	if (useCork)
3897 		corkTagFile(corkFlags);
3898 
3899 	if (isXtagEnabled (XTAG_PSEUDO_TAGS))
3900 		addParserPseudoTags (language);
3901 	initializeParserStats (parser);
3902 	tagFilePosition (&tagfpos);
3903 
3904 	anonResetMaybe (parser);
3905 
3906 	while ( ( whyRescan =
3907 		  createTagsForFile (language, ++passCount) )
3908 		!= RESCAN_NONE)
3909 	{
3910 		if (useCork)
3911 		{
3912 			uncorkTagFile();
3913 			corkTagFile(corkFlags);
3914 		}
3915 
3916 
3917 		if (whyRescan == RESCAN_FAILED)
3918 		{
3919 			/*  Restore prior state of tag file.
3920 			*/
3921 			setTagFilePosition (&tagfpos, true);
3922 			setNumTagsAdded (numTags);
3923 			writerRescanFailed (numTags);
3924 			tagFileResized = true;
3925 			breakPromisesAfter(lastPromise);
3926 		}
3927 		else if (whyRescan == RESCAN_APPEND)
3928 		{
3929 			tagFilePosition (&tagfpos);
3930 			numTags = numTagsAdded ();
3931 			lastPromise = getLastPromise ();
3932 		}
3933 	}
3934 
3935 	/* Force filling allLines buffer and kick the multiline regex parser */
3936 	if (hasLanguageMultilineRegexPatterns (language))
3937 		while (readLineFromInputFile () != NULL)
3938 			; /* Do nothing */
3939 
3940 	if (useCork)
3941 		uncorkTagFile();
3942 
3943 	{
3944 		subparser *s = teardownLanguageSubparsersInUse (language);
3945 		if (exclusive_subparser && s)
3946 			*exclusive_subparser = getSubparserLanguage (s);
3947 	}
3948 
3949 	return tagFileResized;
3950 }
3951 
runParserInNarrowedInputStream(const langType language,unsigned long startLine,long startCharOffset,unsigned long endLine,long endCharOffset,unsigned long sourceLineOffset,int promise)3952 extern bool runParserInNarrowedInputStream (const langType language,
3953 					       unsigned long startLine, long startCharOffset,
3954 					       unsigned long endLine, long endCharOffset,
3955 					       unsigned long sourceLineOffset,
3956 					       int promise)
3957 {
3958 	bool tagFileResized;
3959 
3960 	verbose ("runParserInNarrowedInputStream: %s; "
3961 			 "file: %s, "
3962 			 "start(line: %lu, offset: %ld, srcline: %lu)"
3963 			 " - "
3964 			 "end(line: %lu, offset: %ld)\n",
3965 			 getLanguageName (language),
3966 			 getInputFileName (),
3967 			 startLine, startCharOffset, sourceLineOffset,
3968 			 endLine, endCharOffset);
3969 
3970 	pushNarrowedInputStream (
3971 				 doesParserRequireMemoryStream (language),
3972 				 startLine, startCharOffset,
3973 				 endLine, endCharOffset,
3974 				 sourceLineOffset,
3975 				 promise);
3976 	tagFileResized = createTagsWithFallback1 (language, NULL);
3977 	popNarrowedInputStream  ();
3978 	return tagFileResized;
3979 
3980 }
3981 
createTagsWithFallback(const char * const fileName,const langType language,MIO * mio,time_t mtime,bool * failureInOpenning)3982 static bool createTagsWithFallback (
3983 	const char *const fileName, const langType language,
3984 	MIO *mio, time_t mtime, bool *failureInOpenning)
3985 {
3986 	langType exclusive_subparser = LANG_IGNORE;
3987 	bool tagFileResized = false;
3988 
3989 	Assert (0 <= language  &&  language < (int) LanguageCount);
3990 
3991 	if (!openInputFile (fileName, language, mio, mtime))
3992 	{
3993 		*failureInOpenning = true;
3994 		return false;
3995 	}
3996 	*failureInOpenning = false;
3997 
3998 	tagFileResized = createTagsWithFallback1 (language,
3999 											  &exclusive_subparser);
4000 	tagFileResized = forcePromises()? true: tagFileResized;
4001 
4002 	pushLanguage ((exclusive_subparser == LANG_IGNORE)
4003 				  ? language
4004 				  : exclusive_subparser);
4005 	makeFileTag (fileName);
4006 	popLanguage ();
4007 	closeInputFile ();
4008 
4009 	return tagFileResized;
4010 }
4011 
printGuessedParser(const char * const fileName,langType language)4012 static void printGuessedParser (const char* const fileName, langType language)
4013 {
4014 	const char *parserName;
4015 
4016 	if (language == LANG_IGNORE)
4017 	{
4018 		Option.printLanguage = ((int)true) + 1;
4019 		parserName = RSV_NONE;
4020 	}
4021 	else
4022 	{
4023 		parserName = getLanguageName (language);
4024 	}
4025 
4026 	printf("%s: %s\n", fileName, parserName);
4027 }
4028 
4029 #ifdef HAVE_ICONV
4030 static char **EncodingMap;
4031 static unsigned int EncodingMapMax;
4032 
addLanguageEncoding(const langType language,const char * const encoding)4033 static void addLanguageEncoding (const langType language,
4034 									const char *const encoding)
4035 {
4036 	if (language > EncodingMapMax || EncodingMapMax == 0)
4037 	{
4038 		int i;
4039 		int istart = (EncodingMapMax == 0)? 0: EncodingMapMax + 1;
4040 		EncodingMap = xRealloc (EncodingMap, (language + 1), char*);
4041 		for (i = istart;  i <= language  ;  ++i)
4042 		{
4043 			EncodingMap [i] = NULL;
4044 		}
4045 		EncodingMapMax = language;
4046 	}
4047 	if (EncodingMap [language])
4048 		eFree (EncodingMap [language]);
4049 	EncodingMap [language] = eStrdup(encoding);
4050 	if (!Option.outputEncoding)
4051 		Option.outputEncoding = eStrdup("UTF-8");
4052 }
4053 
processLanguageEncodingOption(const char * const option,const char * const parameter)4054 extern bool processLanguageEncodingOption (const char *const option, const char *const parameter)
4055 {
4056 	langType language;
4057 
4058 	language = getLanguageComponentInOption (option, "input-encoding-");
4059 	if (language == LANG_IGNORE)
4060 		return false;
4061 
4062 	addLanguageEncoding (language, parameter);
4063 	return true;
4064 }
4065 
freeEncodingResources(void)4066 extern void freeEncodingResources (void)
4067 {
4068 	if (EncodingMap)
4069 	{
4070 		unsigned int i;
4071 		for (i = 0  ;  i <= EncodingMapMax  ; ++i)
4072 		{
4073 			if (EncodingMap [i])
4074 				eFree (EncodingMap [i]);
4075 		}
4076 		eFree (EncodingMap);
4077 	}
4078 	if (Option.inputEncoding)
4079 		eFree (Option.inputEncoding);
4080 	if (Option.outputEncoding)
4081 		eFree (Option.outputEncoding);
4082 }
4083 
getLanguageEncoding(const langType language)4084 extern const char *getLanguageEncoding (const langType language)
4085 {
4086 	if (EncodingMap && language <= EncodingMapMax && EncodingMap [language])
4087 		return EncodingMap[language];
4088 	else
4089 		return Option.inputEncoding;
4090 }
4091 #endif
4092 
addParserPseudoTags(langType language)4093 static void addParserPseudoTags (langType language)
4094 {
4095 	parserObject *parser = LanguageTable + language;
4096 	if (!parser->pseudoTagPrinted)
4097 	{
4098 		for (int i = 0; i < PTAG_COUNT; i++)
4099 		{
4100 			if (isPtagParserSpecific (i))
4101 				makePtagIfEnabled (i, language, parser);
4102 		}
4103 		parser->pseudoTagPrinted = 1;
4104 	}
4105 }
4106 
doesParserRequireMemoryStream(const langType language)4107 extern bool doesParserRequireMemoryStream (const langType language)
4108 {
4109 	Assert (0 <= language  &&  language < (int) LanguageCount);
4110 	parserDefinition *const lang = LanguageTable [language].def;
4111 	unsigned int i;
4112 
4113 	if (lang->tagXpathTableCount > 0
4114 		|| lang->useMemoryStreamInput)
4115 	{
4116 		verbose ("%s requires a memory stream for input\n", lang->name);
4117 		return true;
4118 	}
4119 
4120 	for (i = 0; i < lang->dependencyCount; i++)
4121 	{
4122 		parserDependency *d = lang->dependencies + i;
4123 		if (d->type == DEPTYPE_SUBPARSER &&
4124 			((subparser *)(d->data))->direction & SUBPARSER_SUB_RUNS_BASE)
4125 		{
4126 			langType baseParser;
4127 			baseParser = getNamedLanguage (d->upperParser, 0);
4128 			if (doesParserRequireMemoryStream(baseParser))
4129 			{
4130 				verbose ("%s/%s requires a memory stream for input\n", lang->name,
4131 						 LanguageTable[baseParser].def->name);
4132 				return true;
4133 			}
4134 		}
4135 	}
4136 
4137 	return false;
4138 }
4139 
parseFile(const char * const fileName)4140 extern bool parseFile (const char *const fileName)
4141 {
4142 	TRACE_ENTER_TEXT("Parsing file %s",fileName);
4143 	bool bRet = parseFileWithMio (fileName, NULL, NULL);
4144 	TRACE_LEAVE();
4145 	return bRet;
4146 }
4147 
parseMio(const char * const fileName,langType language,MIO * mio,time_t mtime,bool useSourceFileTagPath,void * clientData)4148 static bool parseMio (const char *const fileName, langType language, MIO* mio, time_t mtime, bool useSourceFileTagPath,
4149 					  void *clientData)
4150 {
4151 	bool tagFileResized = false;
4152 	bool failureInOpenning = false;
4153 
4154 	setupWriter (clientData);
4155 
4156 	setupAnon ();
4157 
4158 	initParserTrashBox ();
4159 
4160 	tagFileResized = createTagsWithFallback (fileName, language, mio, mtime, &failureInOpenning);
4161 
4162 	finiParserTrashBox ();
4163 
4164 	teardownAnon ();
4165 
4166 	if (useSourceFileTagPath && (!failureInOpenning))
4167 		return teardownWriter (getSourceFileTagPath())? true: tagFileResized;
4168 	else
4169 		return teardownWriter(fileName);
4170 }
4171 
parseFileWithMio(const char * const fileName,MIO * mio,void * clientData)4172 extern bool parseFileWithMio (const char *const fileName, MIO *mio,
4173 							  void *clientData)
4174 {
4175 	bool tagFileResized = false;
4176 	langType language;
4177 	struct GetLanguageRequest req = {
4178 		.type = mio? GLR_REUSE: GLR_OPEN,
4179 		.fileName = fileName,
4180 		.mio = mio,
4181 	};
4182 	memset (&req.mtime, 0, sizeof (req.mtime));
4183 
4184 	language = getFileLanguageForRequest (&req);
4185 	Assert (language != LANG_AUTO);
4186 
4187 	if (Option.printLanguage)
4188 	{
4189 		printGuessedParser (fileName, language);
4190 		return tagFileResized;
4191 	}
4192 
4193 	if (language == LANG_IGNORE)
4194 		verbose ("ignoring %s (unknown language/language disabled)\n",
4195 			 fileName);
4196 	else
4197 	{
4198 		Assert(isLanguageEnabled (language));
4199 
4200 		if (Option.filter && ! Option.interactive)
4201 			openTagFile ();
4202 
4203 #ifdef HAVE_ICONV
4204 		/* TODO: checkUTF8BOM can be used to update the encodings. */
4205 		openConverter (getLanguageEncoding (language), Option.outputEncoding);
4206 #endif
4207 		tagFileResized = parseMio (fileName, language, req.mio, req.mtime, true, clientData);
4208 		if (Option.filter && ! Option.interactive)
4209 			closeTagFile (tagFileResized);
4210 		addTotals (1, 0L, 0L);
4211 
4212 #ifdef HAVE_ICONV
4213 		closeConverter ();
4214 #endif
4215 	}
4216 
4217 	if (req.type == GLR_OPEN && req.mio)
4218 		mio_unref (req.mio);
4219 
4220 	return tagFileResized;
4221 }
4222 
parseRawBuffer(const char * fileName,unsigned char * buffer,size_t bufferSize,const langType language,void * clientData)4223 extern bool parseRawBuffer(const char *fileName, unsigned char *buffer,
4224 			 size_t bufferSize, const langType language, void *clientData)
4225 {
4226 	MIO *mio = NULL;
4227 	bool r;
4228 
4229 	if (buffer)
4230 		mio = mio_new_memory (buffer, bufferSize, NULL, NULL);
4231 
4232 	r = parseMio (fileName, language, mio, (time_t)0, false, clientData);
4233 
4234 	if (buffer)
4235 		mio_unref (mio);
4236 
4237 	return r;
4238 }
4239 
matchLanguageMultilineRegexCommon(const langType language,bool (* func)(struct lregexControlBlock *,const vString * const),const vString * const allLines)4240 static void matchLanguageMultilineRegexCommon (const langType language,
4241 											   bool (* func) (struct lregexControlBlock *, const vString* const),
4242 											   const vString* const allLines)
4243 {
4244 	subparser *tmp;
4245 
4246 	func ((LanguageTable + language)->lregexControlBlock, allLines);
4247 	foreachSubparser(tmp, true)
4248 	{
4249 		langType t = getSubparserLanguage (tmp);
4250 		enterSubparser (tmp);
4251 		matchLanguageMultilineRegexCommon (t, func, allLines);
4252 		leaveSubparser ();
4253 	}
4254 }
4255 
matchLanguageMultilineRegex(const langType language,const vString * const allLines)4256 extern void matchLanguageMultilineRegex (const langType language,
4257 										 const vString* const allLines)
4258 {
4259 	matchLanguageMultilineRegexCommon(language, matchMultilineRegex, allLines);
4260 }
4261 
matchLanguageMultitableRegex(const langType language,const vString * const allLines)4262 extern void matchLanguageMultitableRegex (const langType language,
4263 										  const vString* const allLines)
4264 {
4265 	matchLanguageMultilineRegexCommon(language, matchMultitableRegex, allLines);
4266 }
4267 
processLanguageMultitableExtendingOption(langType language,const char * const parameter)4268 extern void processLanguageMultitableExtendingOption (langType language, const char *const parameter)
4269 {
4270 	const char* src;
4271 	char* dist;
4272 	const char *tmp;
4273 
4274 	tmp = strchr(parameter, '+');
4275 
4276 	if (!tmp)
4277 		error (FATAL, "no separator(+) found: %s", parameter);
4278 
4279 	if (tmp == parameter)
4280 		error (FATAL, "the name of source table is empty in table extending: %s", parameter);
4281 
4282 	src = tmp + 1;
4283 	if (!*src)
4284 		error (FATAL, "the name of dist table is empty in table extending: %s", parameter);
4285 
4286 	dist = eStrndup(parameter, tmp  - parameter);
4287 	extendRegexTable(((LanguageTable + language)->lregexControlBlock), src, dist);
4288 	eFree (dist);
4289 }
4290 
lregexQueryParserAndSubparsers(const langType language,bool (* predicate)(struct lregexControlBlock *))4291 static bool lregexQueryParserAndSubparsers (const langType language, bool (* predicate) (struct lregexControlBlock *))
4292 {
4293 	bool r;
4294 	subparser *tmp;
4295 
4296 	r = predicate ((LanguageTable + language)->lregexControlBlock);
4297 	if (!r)
4298 	{
4299 		foreachSubparser(tmp, true)
4300 		{
4301 			langType t = getSubparserLanguage (tmp);
4302 			enterSubparser (tmp);
4303 			r = lregexQueryParserAndSubparsers (t, predicate);
4304 			leaveSubparser ();
4305 
4306 			if (r)
4307 				break;
4308 		}
4309 	}
4310 
4311 	return r;
4312 }
4313 
hasLanguageMultilineRegexPatterns(const langType language)4314 extern bool hasLanguageMultilineRegexPatterns (const langType language)
4315 {
4316 	return lregexQueryParserAndSubparsers (language, regexNeedsMultilineBuffer);
4317 }
4318 
4319 
addLanguageCallbackRegex(const langType language,const char * const regex,const char * const flags,const regexCallback callback,bool * disabled,void * userData)4320 extern void addLanguageCallbackRegex (const langType language, const char *const regex, const char *const flags,
4321 									  const regexCallback callback, bool *disabled, void *userData)
4322 {
4323 	addCallbackRegex ((LanguageTable +language)->lregexControlBlock, regex, flags, callback, disabled, userData);
4324 }
4325 
doesLanguageExpectCorkInRegex(const langType language)4326 extern bool doesLanguageExpectCorkInRegex (const langType language)
4327 {
4328 	bool hasScopeAction;
4329 
4330 	pushLanguage (language);
4331 	hasScopeAction = lregexQueryParserAndSubparsers (language, doesExpectCorkInRegex);
4332 	popLanguage ();
4333 
4334 	return hasScopeAction;
4335 }
4336 
matchLanguageRegex(const langType language,const vString * const line)4337 extern void matchLanguageRegex (const langType language, const vString* const line)
4338 {
4339 	subparser *tmp;
4340 
4341 	matchRegex ((LanguageTable + language)->lregexControlBlock, line);
4342 	foreachSubparser(tmp, true)
4343 	{
4344 		langType t = getSubparserLanguage (tmp);
4345 		enterSubparser (tmp);
4346 		matchLanguageRegex (t, line);
4347 		leaveSubparser ();
4348 	}
4349 }
4350 
processLanguageRegexOption(langType language,enum regexParserType regptype,const char * const parameter)4351 extern bool processLanguageRegexOption (langType language,
4352 										enum regexParserType regptype,
4353 										const char *const parameter)
4354 {
4355 	processTagRegexOption ((LanguageTable +language)->lregexControlBlock,
4356 						   regptype, parameter);
4357 
4358 	return true;
4359 }
4360 
processTabledefOption(const char * const option,const char * const parameter)4361 extern bool processTabledefOption (const char *const option, const char *const parameter)
4362 {
4363 	langType language;
4364 
4365 	language = getLanguageComponentInOption (option, "_tabledef-");
4366 	if (language == LANG_IGNORE)
4367 		return false;
4368 
4369 	if (parameter == NULL || parameter[0] == '\0')
4370 		error (FATAL, "A parameter is needed after \"%s\" option", option);
4371 
4372 	addRegexTable((LanguageTable +language)->lregexControlBlock, parameter);
4373 	return true;
4374 }
4375 
useRegexMethod(const langType language)4376 extern void useRegexMethod (const langType language)
4377 {
4378 	parserDefinition* lang;
4379 
4380 	Assert (0 <= language  &&  language < (int) LanguageCount);
4381 	lang = LanguageTable [language].def;
4382 	lang->method |= METHOD_REGEX;
4383 }
4384 
useXpathMethod(const langType language)4385 static void useXpathMethod (const langType language)
4386 {
4387 	parserDefinition* lang;
4388 
4389 	Assert (0 <= language  &&  language < (int) LanguageCount);
4390 	lang = LanguageTable [language].def;
4391 	lang->method |= METHOD_XPATH;
4392 }
4393 
installTagRegexTable(const langType language)4394 static void installTagRegexTable (const langType language)
4395 {
4396 	parserObject* parser;
4397 	parserDefinition* lang;
4398 	unsigned int i;
4399 
4400 	Assert (0 <= language  &&  language < (int) LanguageCount);
4401 	parser = LanguageTable + language;
4402 	lang = parser->def;
4403 
4404 
4405 	if (lang->tagRegexTable != NULL)
4406 	{
4407 		/* ctags_cli_main() calls initRegexOptscript ().
4408 		 * However, mini-geany deasn't call ctags_cli_main().
4409 		 * So we call initRegexOptscript () here.
4410 		 */
4411 		initRegexOptscript ();
4412 
4413 	    for (i = 0; i < lang->tagRegexCount; ++i)
4414 		{
4415 			if (lang->tagRegexTable [i].mline)
4416 				addTagMultiLineRegex (parser->lregexControlBlock,
4417 									  lang->tagRegexTable [i].regex,
4418 									  lang->tagRegexTable [i].name,
4419 									  lang->tagRegexTable [i].kinds,
4420 									  lang->tagRegexTable [i].flags,
4421 									  (lang->tagRegexTable [i].disabled));
4422 			else
4423 				addTagRegex (parser->lregexControlBlock,
4424 							 lang->tagRegexTable [i].regex,
4425 							 lang->tagRegexTable [i].name,
4426 							 lang->tagRegexTable [i].kinds,
4427 							 lang->tagRegexTable [i].flags,
4428 							 (lang->tagRegexTable [i].disabled));
4429 		}
4430 	}
4431 }
4432 
installKeywordTable(const langType language)4433 static void installKeywordTable (const langType language)
4434 {
4435 	parserDefinition* lang;
4436 	unsigned int i;
4437 
4438 	Assert (0 <= language  &&  language < (int) LanguageCount);
4439 	lang = LanguageTable [language].def;
4440 
4441 	if (lang->keywordTable != NULL)
4442 	{
4443 		for (i = 0; i < lang->keywordCount; ++i)
4444 			addKeyword (lang->keywordTable [i].name,
4445 				    language,
4446 				    lang->keywordTable [i].id);
4447 	}
4448 }
4449 
installTagXpathTable(const langType language)4450 static void installTagXpathTable (const langType language)
4451 {
4452 	parserDefinition* lang;
4453 	unsigned int i, j;
4454 
4455 	Assert (0 <= language  &&  language < (int) LanguageCount);
4456 	lang = LanguageTable [language].def;
4457 
4458 	if (lang->tagXpathTableTable != NULL)
4459 	{
4460 		for (i = 0; i < lang->tagXpathTableCount; ++i)
4461 			for (j = 0; j < lang->tagXpathTableTable[i].count; ++j)
4462 				addTagXpath (language, lang->tagXpathTableTable[i].table + j);
4463 		useXpathMethod (language);
4464 	}
4465 }
4466 
uninstallTagXpathTable(const langType language)4467 static void uninstallTagXpathTable (const langType language)
4468 {
4469 	parserDefinition* lang;
4470 	unsigned int i, j;
4471 
4472 	Assert (0 <= language  &&  language < (int) LanguageCount);
4473 	lang = LanguageTable [language].def;
4474 
4475 	if (lang->tagXpathTableTable != NULL)
4476 	{
4477 		for (i = 0; i < lang->tagXpathTableCount; ++i)
4478 			for (j = 0; j < lang->tagXpathTableTable[i].count; ++j)
4479 				removeTagXpath (language, lang->tagXpathTableTable[i].table + j);
4480 	}
4481 }
4482 
getXpathTableTable(const langType language,unsigned int nth)4483 const tagXpathTableTable *getXpathTableTable (const langType language, unsigned int nth)
4484 {
4485 	parserDefinition* lang;
4486 
4487 	Assert (0 <= language  &&  language < (int) LanguageCount);
4488 	lang = LanguageTable [language].def;
4489 
4490 	Assert (nth < lang->tagXpathTableCount);
4491 	return lang->tagXpathTableTable + nth;
4492 }
4493 
getXpathFileSpecCount(const langType language)4494 extern unsigned int getXpathFileSpecCount (const langType language)
4495 {
4496 	parserDefinition* lang;
4497 
4498 	Assert (0 <= language  &&  language < (int) LanguageCount);
4499 	lang = LanguageTable [language].def;
4500 
4501 	return lang->xpathFileSpecCount;
4502 }
4503 
getXpathFileSpec(const langType language,unsigned int nth)4504 extern xpathFileSpec* getXpathFileSpec (const langType language, unsigned int nth)
4505 {
4506 	parserDefinition* lang;
4507 
4508 	Assert (0 <= language  &&  language < (int) LanguageCount);
4509 	lang = LanguageTable [language].def;
4510 
4511 	Assert (nth < lang->xpathFileSpecCount);
4512 	return lang->xpathFileSpecs + nth;
4513 }
4514 
makeKindSeparatorsPseudoTags(const langType language,const ptagDesc * pdesc)4515 extern bool makeKindSeparatorsPseudoTags (const langType language,
4516 					     const ptagDesc *pdesc)
4517 {
4518 	parserObject* parser;
4519 	parserDefinition* lang;
4520 	struct kindControlBlock *kcb;
4521 	kindDefinition *kind;
4522 	unsigned int kindCount;
4523 	unsigned int i, j;
4524 
4525 	bool r = false;
4526 
4527 	Assert (0 <= language  &&  language < (int) LanguageCount);
4528 	parser = LanguageTable + language;
4529 	lang = parser->def;
4530 	kcb = parser->kindControlBlock;
4531 	kindCount = countKinds(kcb);
4532 
4533 	if (kindCount == 0)
4534 		return r;
4535 
4536 	vString *sepval = vStringNew();
4537 	for (i = 0; i < kindCount; ++i)
4538 	{
4539 		kind = getKind (kcb, i);
4540 		for (j = 0; j < kind->separatorCount; ++j)
4541 		{
4542 			char name[3] = {[1] = '\0', [2] = '\0'};
4543 			const kindDefinition *upperKind;
4544 			const scopeSeparator *sep;
4545 
4546 			sep = kind->separators + j;
4547 
4548 			if (sep->parentKindIndex == KIND_WILDCARD_INDEX)
4549 			{
4550 				name[0] = KIND_WILDCARD_LETTER;
4551 				name[1] = kind->letter;
4552 			}
4553 			else if (sep->parentKindIndex == KIND_GHOST_INDEX)
4554 			{
4555 				/* This is root separator: no upper item is here. */
4556 				name[0] = kind->letter;
4557 			}
4558 			else
4559 			{
4560 				upperKind = getLanguageKind (language,
4561 							    sep->parentKindIndex);
4562 				if (!upperKind)
4563 					continue;
4564 
4565 				name[0] = upperKind->letter;
4566 				name[1] = kind->letter;
4567 			}
4568 
4569 
4570 			vStringClear (sepval);
4571 			vStringCatSWithEscaping (sepval, sep->separator);
4572 
4573 			r = writePseudoTag (pdesc, vStringValue (sepval),
4574 					    name, lang->name) || r;
4575 		}
4576 	}
4577 	vStringDelete (sepval);
4578 
4579 	return r;
4580 }
4581 
4582 struct makeKindDescriptionPseudoTagData {
4583 	const char* langName;
4584 	const ptagDesc *pdesc;
4585 	bool written;
4586 };
4587 
makeKindDescriptionPseudoTag(kindDefinition * kind,void * user_data)4588 static bool makeKindDescriptionPseudoTag (kindDefinition *kind,
4589 					     void *user_data)
4590 {
4591 	struct makeKindDescriptionPseudoTagData *data = user_data;
4592 	vString *letter_and_name;
4593 	vString *description;
4594 	const char *d;
4595 
4596 	letter_and_name = vStringNew ();
4597 	description = vStringNew ();
4598 
4599 	vStringPut (letter_and_name, kind -> letter);
4600 	vStringPut (letter_and_name, ',');
4601 	vStringCatS (letter_and_name, kind -> name);
4602 
4603 	d = kind->description? kind->description: kind->name;
4604 	vStringCatSWithEscapingAsPattern (description, d);
4605 	data->written |=  writePseudoTag (data->pdesc, vStringValue (letter_and_name),
4606 					  vStringValue (description),
4607 					  data->langName);
4608 
4609 	vStringDelete (description);
4610 	vStringDelete (letter_and_name);
4611 
4612 	return false;
4613 }
4614 
makeRoleDescriptionPseudoTag(kindDefinition * kind,roleDefinition * role,void * user_data)4615 static bool makeRoleDescriptionPseudoTag (kindDefinition *kind,
4616 										  roleDefinition *role,
4617 										  void *user_data)
4618 {
4619 	struct makeKindDescriptionPseudoTagData *data = user_data;
4620 
4621 	vString *parser_and_kind_name = vStringNewInit (data->langName);
4622 	vStringCatS (parser_and_kind_name, PSEUDO_TAG_SEPARATOR);
4623 	vStringCatS (parser_and_kind_name, kind->name);
4624 
4625 	vString *description = vStringNew ();
4626 	const char *d = role->description? role->description: role->name;
4627 	vStringCatSWithEscapingAsPattern (description, d);
4628 
4629 	data->written |=  writePseudoTag (data->pdesc, role->name,
4630 									  vStringValue (description),
4631 									  vStringValue (parser_and_kind_name));
4632 
4633 	vStringDelete (description);
4634 	vStringDelete (parser_and_kind_name);
4635 
4636 	return false;
4637 }
4638 
makeKindDescriptionsPseudoTags(const langType language,const ptagDesc * pdesc)4639 extern bool makeKindDescriptionsPseudoTags (const langType language,
4640 					    const ptagDesc *pdesc)
4641 {
4642 	parserObject *parser;
4643 	struct kindControlBlock *kcb;
4644 	parserDefinition* lang;
4645 	kindDefinition *kind;
4646 	unsigned int kindCount, i;
4647 	struct makeKindDescriptionPseudoTagData data;
4648 
4649 	Assert (0 <= language  &&  language < (int) LanguageCount);
4650 	parser = LanguageTable + language;
4651 	kcb = parser->kindControlBlock;
4652 	lang = parser->def;
4653 
4654 	kindCount = countKinds(kcb);
4655 
4656 	data.langName = lang->name;
4657 	data.pdesc = pdesc;
4658 	data.written = false;
4659 
4660 	for (i = 0; i < kindCount; ++i)
4661 	{
4662 		if (!isLanguageKindEnabled (language, i))
4663 			continue;
4664 
4665 		kind = getKind (kcb, i);
4666 		makeKindDescriptionPseudoTag (kind, &data);
4667 	}
4668 
4669 	return data.written;
4670 }
4671 
makeFieldDescriptionPseudoTag(const langType language,fieldType f,const ptagDesc * pdesc)4672 static bool makeFieldDescriptionPseudoTag (const langType language,
4673 										   fieldType f,
4674 										   const ptagDesc *pdesc)
4675 {
4676 	vString *description;
4677 	const char *name = getFieldName (f);
4678 
4679 	if (name == NULL || name [0] == '\0')
4680 		return false;
4681 
4682 	description = vStringNew ();
4683 	vStringCatSWithEscapingAsPattern (description,
4684 									  getFieldDescription (f));
4685 
4686 	bool r = writePseudoTag (pdesc, name,
4687 							 vStringValue (description),
4688 							 language == LANG_IGNORE? NULL: getLanguageName (language));
4689 
4690 	vStringDelete (description);
4691 	return r;
4692 }
4693 
makeFieldDescriptionsPseudoTags(const langType language,const ptagDesc * pdesc)4694 extern bool makeFieldDescriptionsPseudoTags (const langType language,
4695 											 const ptagDesc *pdesc)
4696 {
4697 	bool written = false;
4698 	for (int i = 0; i < countFields (); i++)
4699 	{
4700 		if (getFieldOwner (i) == language
4701 			&& isFieldEnabled (i))
4702 		{
4703 			if (makeFieldDescriptionPseudoTag (language, i, pdesc))
4704 				written = true;
4705 		}
4706 	}
4707 	return written;
4708 }
4709 
makeExtraDescriptionPseudoTag(const langType language,xtagType x,const ptagDesc * pdesc)4710 static bool makeExtraDescriptionPseudoTag (const langType language,
4711 										   xtagType x,
4712 										   const ptagDesc *pdesc)
4713 {
4714 	vString *description;
4715 	const char *name = getXtagName (x);
4716 
4717 	if (name == NULL || name [0] == '\0')
4718 		return false;
4719 
4720 	description = vStringNew ();
4721 	vStringCatSWithEscapingAsPattern (description,
4722 									  getXtagDescription (x));
4723 
4724 	bool r = writePseudoTag (pdesc, name,
4725 							 vStringValue (description),
4726 							 language == LANG_IGNORE? NULL: getLanguageName (language));
4727 
4728 	vStringDelete (description);
4729 	return r;
4730 }
4731 
makeExtraDescriptionsPseudoTags(const langType language,const ptagDesc * pdesc)4732 extern bool makeExtraDescriptionsPseudoTags (const langType language,
4733 											 const ptagDesc *pdesc)
4734 {
4735 	bool written = false;
4736 	for (int i = 0; i < countXtags (); i++)
4737 	{
4738 		if (getXtagOwner (i) == language
4739 			&& isXtagEnabled (i))
4740 		{
4741 			if (makeExtraDescriptionPseudoTag (language, i, pdesc))
4742 				written = true;
4743 		}
4744 	}
4745 	return written;
4746 }
4747 
makeRoleDescriptionsPseudoTags(const langType language,const ptagDesc * pdesc)4748 extern bool makeRoleDescriptionsPseudoTags (const langType language,
4749 											const ptagDesc *pdesc)
4750 {
4751 	parserObject *parser;
4752 	struct kindControlBlock *kcb;
4753 	parserDefinition* lang;
4754 	kindDefinition *kind;
4755 	struct makeKindDescriptionPseudoTagData data;
4756 
4757 	Assert (0 <= language  &&  language < (int) LanguageCount);
4758 	parser = LanguageTable + language;
4759 	kcb = parser->kindControlBlock;
4760 	lang = parser->def;
4761 
4762 	unsigned int kindCount = countKinds(kcb);
4763 
4764 	data.langName = lang->name;
4765 	data.pdesc = pdesc;
4766 	data.written = false;
4767 
4768 	for (unsigned int i = 0; i < kindCount; ++i)
4769 	{
4770 		if (!isLanguageKindEnabled (language, i))
4771 			continue;
4772 
4773 		kind = getKind (kcb, i);
4774 
4775 		unsigned int roleCount = countRoles (kcb, i);
4776 		for (unsigned int j = 0; j < roleCount; ++j)
4777 		{
4778 			if (isRoleEnabled (kcb, i, j))
4779 			{
4780 				roleDefinition *role = getRole (kcb, i, j);
4781 				makeRoleDescriptionPseudoTag (kind, role, &data);
4782 			}
4783 		}
4784 	}
4785 
4786 	return data.written;
4787 }
4788 
4789 /*
4790 *   Copyright (c) 2016, Szymon Tomasz Stefanek
4791 *
4792 *   This source code is released for free distribution under the terms of the
4793 *   GNU General Public License version 2 or (at your option) any later version.
4794 *
4795 *   Anonymous name generator
4796 */
4797 static ptrArray *parsersUsedInCurrentInput;
4798 
setupAnon(void)4799 static void setupAnon (void)
4800 {
4801 	parsersUsedInCurrentInput = ptrArrayNew (NULL);
4802 }
4803 
teardownAnon(void)4804 static void teardownAnon (void)
4805 {
4806 	ptrArrayDelete (parsersUsedInCurrentInput);
4807 }
4808 
anonResetMaybe(parserObject * parser)4809 static void anonResetMaybe (parserObject *parser)
4810 {
4811 	if (ptrArrayHas (parsersUsedInCurrentInput, parser))
4812 		return;
4813 
4814 	parser -> anonymousIdentiferId = 0;
4815 	ptrArrayAdd (parsersUsedInCurrentInput, parser);
4816 }
4817 
anonHash(const unsigned char * str)4818 static unsigned int anonHash(const unsigned char *str)
4819 {
4820 	unsigned int hash = 5381;
4821 	int c;
4822 
4823 	while((c = *str++))
4824 		hash = ((hash << 5) + hash) + c; /* hash * 33 + c */
4825 
4826 	return hash ;
4827 }
4828 
anonHashString(const char * filename,char buf[9])4829 extern void anonHashString (const char *filename, char buf[9])
4830 {
4831 	sprintf(buf, "%08x", anonHash((const unsigned char *)filename));
4832 }
4833 
4834 
anonConcat(vString * buffer,int kind)4835 extern void anonConcat (vString *buffer, int kind)
4836 {
4837 	anonGenerate (buffer, NULL, kind);
4838 }
4839 
anonGenerate(vString * buffer,const char * prefix,int kind)4840 extern void anonGenerate (vString *buffer, const char *prefix, int kind)
4841 {
4842 	parserObject* parser = LanguageTable + getInputLanguage ();
4843 	parser -> anonymousIdentiferId ++;
4844 
4845 	char szNum[32];
4846 	char buf [9];
4847 
4848 	if (prefix)
4849 		vStringCopyS(buffer, prefix);
4850 
4851 	anonHashString (getInputFileName(), buf);
4852 	sprintf(szNum,"%s%02x%02x",buf,parser -> anonymousIdentiferId, kind);
4853 	vStringCatS(buffer,szNum);
4854 }
4855 
anonGenerateNew(const char * prefix,int kind)4856 extern vString *anonGenerateNew (const char *prefix, int kind)
4857 {
4858 	vString *buffer = vStringNew ();
4859 
4860 	anonGenerate (buffer, prefix, kind);
4861 	return buffer;
4862 }
4863 
4864 
applyParameter(const langType language,const char * name,const char * args)4865 extern void applyParameter (const langType language, const char *name, const char *args)
4866 {
4867 	parserDefinition* parser;
4868 
4869 
4870 	Assert (0 <= language  &&  language < (int) LanguageCount);
4871 
4872 	initializeParserOne (language);
4873 	parser = LanguageTable [language].def;
4874 
4875 	if (parser->parameterHandlerTable)
4876 	{
4877 		unsigned int i;
4878 
4879 		for (i = 0; i < parser->parameterHandlerCount; i++)
4880 		{
4881 			if (strcmp (parser->parameterHandlerTable [i].name, name) == 0)
4882 			{
4883 				parser->parameterHandlerTable [i].handleParameter (language, name, args);
4884 				return;
4885 			}
4886 		}
4887 	}
4888 
4889 	error (FATAL, "no such parameter in %s: %s", parser->name, name);
4890 }
4891 
getNextSubparser(subparser * last,bool includingNoneCraftedParser)4892 extern subparser *getNextSubparser(subparser *last,
4893 								   bool includingNoneCraftedParser)
4894 {
4895 	langType lang = getInputLanguage ();
4896 	parserObject *parser = LanguageTable + lang;
4897 	subparser *r;
4898 	langType t;
4899 
4900 	if (last == NULL)
4901 		r = getFirstSubparser(parser->slaveControlBlock);
4902 	else
4903 		r = last->next;
4904 
4905 	if (r == NULL)
4906 		return r;
4907 
4908 	t = getSubparserLanguage(r);
4909 	if (isLanguageEnabled (t) &&
4910 		(includingNoneCraftedParser
4911 		 || ((((LanguageTable + t)->def->method) & METHOD_NOT_CRAFTED) == 0)))
4912 		return r;
4913 	else
4914 		return getNextSubparser (r, includingNoneCraftedParser);
4915 }
4916 
getNextSlaveParser(slaveParser * last)4917 extern slaveParser *getNextSlaveParser(slaveParser *last)
4918 {
4919 	langType lang = getInputLanguage ();
4920 	parserObject *parser = LanguageTable + lang;
4921 	slaveParser *r;
4922 
4923 	if (last == NULL)
4924 		r = getFirstSlaveParser(parser->slaveControlBlock);
4925 	else
4926 		r = last->next;
4927 
4928 	return r;
4929 }
4930 
scheduleRunningBaseparser(int dependencyIndex)4931 extern void scheduleRunningBaseparser (int dependencyIndex)
4932 {
4933 	langType current = getInputLanguage ();
4934 	parserDefinition *current_parser = LanguageTable [current].def;
4935 	parserDependency *dep = NULL;
4936 
4937 	if (dependencyIndex == RUN_DEFAULT_SUBPARSERS)
4938 	{
4939 		for (unsigned int i = 0; i < current_parser->dependencyCount; ++i)
4940 			if (current_parser->dependencies[i].type == DEPTYPE_SUBPARSER)
4941 			{
4942 				dep = current_parser->dependencies + i;
4943 				break;
4944 			}
4945 	}
4946 	else
4947 		dep = current_parser->dependencies + dependencyIndex;
4948 
4949 	if (dep == NULL)
4950 		return;
4951 
4952 	const char *base_name = dep->upperParser;
4953 	langType base = getNamedLanguage (base_name, 0);
4954 	parserObject *base_parser = LanguageTable + base;
4955 
4956 	if (dependencyIndex == RUN_DEFAULT_SUBPARSERS)
4957 		useDefaultSubparsers(base_parser->slaveControlBlock);
4958 	else
4959 		useSpecifiedSubparser (base_parser->slaveControlBlock,
4960 							   dep->data);
4961 
4962 	if (!isLanguageEnabled (base))
4963 	{
4964 		enableLanguage (base, true);
4965 		base_parser->dontEmit = true;
4966 		verbose ("force enable \"%s\" as base parser\n", base_parser->def->name);
4967 	}
4968 
4969 	{
4970 		subparser *tmp;
4971 
4972 		verbose ("scheduleRunningBaseparser %s with subparsers: ", base_name);
4973 		pushLanguage (base);
4974 		foreachSubparser(tmp, true)
4975 		{
4976 			langType t = getSubparserLanguage (tmp);
4977 			verbose ("%s ", getLanguageName (t));
4978 		}
4979 		popLanguage ();
4980 		verbose ("\n");
4981 	}
4982 
4983 
4984 	makePromise(base_name, THIN_STREAM_SPEC);
4985 }
4986 
isParserMarkedNoEmission(void)4987 extern bool isParserMarkedNoEmission (void)
4988 {
4989 	langType lang = getInputLanguage();
4990 	parserObject *parser = LanguageTable + lang;
4991 
4992 	return parser->dontEmit;
4993 }
4994 
4995 
getSubparserRunningBaseparser(void)4996 extern subparser* getSubparserRunningBaseparser (void)
4997 {
4998 	langType current = getInputLanguage ();
4999 	parserObject *current_parser = LanguageTable + current;
5000 	subparser *s = getFirstSubparser (current_parser->slaveControlBlock);
5001 
5002 	if (s && s->schedulingBaseparserExplicitly)
5003 		return s;
5004 	else
5005 		return NULL;
5006 }
5007 
printLanguageSubparsers(const langType language,bool withListHeader,bool machinable,FILE * fp)5008 extern void printLanguageSubparsers (const langType language,
5009 									 bool withListHeader, bool machinable, FILE *fp)
5010 {
5011 	for (int i = 0; i < (int) LanguageCount; i++)
5012 		initializeParserOne (i);
5013 
5014 	struct colprintTable * table = subparserColprintTableNew();
5015 	parserObject *parser;
5016 
5017 	if (language == LANG_AUTO)
5018 	{
5019 		for (int i = 0; i < (int) LanguageCount; i++)
5020 		{
5021 			parser = LanguageTable + i;
5022 			if (parser->def->invisible)
5023 				continue;
5024 
5025 			subparserColprintAddSubparsers (table,
5026 											parser->slaveControlBlock);
5027 		}
5028 	}
5029 	else
5030 	{
5031 		parser = (LanguageTable + language);
5032 		subparserColprintAddSubparsers (table,
5033 										parser->slaveControlBlock);
5034 	}
5035 
5036 	subparserColprintTablePrint (table,
5037 								 withListHeader, machinable,
5038 								 fp);
5039 	colprintTableDelete (table);
5040 }
5041 
printLangdefFlags(bool withListHeader,bool machinable,FILE * fp)5042 extern void printLangdefFlags (bool withListHeader, bool machinable, FILE *fp)
5043 {
5044 	struct colprintTable * table;
5045 
5046 	table = flagsColprintTableNew ();
5047 
5048 	flagsColprintAddDefinitions (table, PreLangDefFlagDef, ARRAY_SIZE (PreLangDefFlagDef));
5049 
5050 	flagsColprintTablePrint (table, withListHeader, machinable, fp);
5051 	colprintTableDelete(table);
5052 }
5053 
printKinddefFlags(bool withListHeader,bool machinable,FILE * fp)5054 extern void printKinddefFlags (bool withListHeader, bool machinable, FILE *fp)
5055 {
5056 	struct colprintTable * table;
5057 
5058 	table = flagsColprintTableNew ();
5059 
5060 	flagsColprintAddDefinitions (table, PreKindDefFlagDef, ARRAY_SIZE (PreKindDefFlagDef));
5061 
5062 	flagsColprintTablePrint (table, withListHeader, machinable, fp);
5063 	colprintTableDelete(table);
5064 }
5065 
printLanguageMultitableStatistics(langType language)5066 extern void printLanguageMultitableStatistics (langType language)
5067 {
5068 	parserObject* const parser = LanguageTable + language;
5069 	printMultitableStatistics (parser->lregexControlBlock);
5070 }
5071 
addLanguageRegexTable(const langType language,const char * name)5072 extern void addLanguageRegexTable (const langType language, const char *name)
5073 {
5074 	parserObject* const parser = LanguageTable + language;
5075 	addRegexTable (parser->lregexControlBlock, name);
5076 }
5077 
addLanguageTagMultiTableRegex(const langType language,const char * const table_name,const char * const regex,const char * const name,const char * const kinds,const char * const flags,bool * disabled)5078 extern void addLanguageTagMultiTableRegex(const langType language,
5079 										  const char* const table_name,
5080 										  const char* const regex,
5081 										  const char* const name, const char* const kinds, const char* const flags,
5082 										  bool *disabled)
5083 {
5084 	parserObject* const parser = LanguageTable + language;
5085 	addTagMultiTableRegex (parser->lregexControlBlock, table_name, regex,
5086 						   name, kinds, flags, disabled);
5087 }
5088 
addLanguageOptscriptToHook(langType language,enum scriptHook hook,const char * const src)5089 extern void addLanguageOptscriptToHook (langType language, enum scriptHook hook, const char *const src)
5090 {
5091 	addOptscriptToHook (LanguageTable [language].lregexControlBlock, hook, src);
5092 }
5093 
processHookOption(const char * const option,const char * const parameter,const char * prefix,enum scriptHook hook)5094 static bool processHookOption (const char *const option, const char *const parameter, const char *prefix,
5095 							   enum scriptHook hook)
5096 {
5097 	langType language = getLanguageComponentInOption (option, prefix);
5098 	if (language == LANG_IGNORE)
5099 		return false;
5100 
5101 	if (parameter == NULL || parameter[0] == '\0')
5102 		error (FATAL, "A parameter is needed after \"%s\" option", option);
5103 
5104 	const char * code = flagsEval (parameter, NULL, 0, NULL);
5105 	if (code == NULL)
5106 		error (FATAL, "Cannot recognized a code block surrounded by `{{' and `}}' after \"%s\" option", option);
5107 	addLanguageOptscriptToHook (language, hook, code);
5108 
5109 	return true;
5110 }
5111 
processPreludeOption(const char * const option,const char * const parameter)5112 extern bool processPreludeOption (const char *const option, const char *const parameter)
5113 {
5114 	return processHookOption (option, parameter, "_prelude-", SCRIPT_HOOK_PRELUDE);
5115 }
5116 
processSequelOption(const char * const option,const char * const parameter)5117 extern bool processSequelOption (const char *const option, const char *const parameter)
5118 {
5119 	return processHookOption (option, parameter, "_sequel-", SCRIPT_HOOK_SEQUEL);
5120 }
5121 
processPretendOption(const char * const option,const char * const parameter)5122 extern bool processPretendOption (const char *const option, const char *const parameter)
5123 {
5124 	langType new_language, old_language;
5125 
5126 #define pretendOptionPrefix "_pretend-"
5127 	new_language = getLanguageComponentInOptionFull (option, pretendOptionPrefix, true);
5128 	if (new_language == LANG_IGNORE)
5129 		return false;
5130 
5131 	if (parameter == NULL || parameter[0] == '\0')
5132 		error (FATAL, "A parameter is needed after \"%s\" option", option);
5133 
5134 	old_language = getNamedLanguageFull (parameter, 0, true, false);
5135 	if (old_language == LANG_IGNORE)
5136 		error (FATAL, "Unknown language \"%s\" in option \"--%s=%s\"",
5137 			   parameter, option, parameter);
5138 
5139 	if (LanguageTable [new_language].pretendingAsLanguage != LANG_IGNORE)
5140 	{
5141 		error (FATAL, "%s parser pretends as %s already\n",
5142 			   getLanguageNameFull (new_language, true),
5143 			   getLanguageNameFull (LanguageTable [new_language].pretendingAsLanguage, true));
5144 	}
5145 	if (LanguageTable [old_language].pretendedAsLanguage != LANG_IGNORE)
5146 	{
5147 		error (FATAL, "%s parser is pretended as %s already\n",
5148 			   getLanguageNameFull (old_language, true),
5149 			   getLanguageNameFull (LanguageTable [old_language].pretendedAsLanguage, true));
5150 	}
5151 
5152 	verbose ("%s pretends %s\n",
5153 			 getLanguageNameFull (new_language, true),
5154 			 getLanguageNameFull (old_language, true));
5155 
5156 	LanguageTable [new_language].pretendingAsLanguage = old_language;
5157 	LanguageTable [old_language].pretendedAsLanguage  = new_language;
5158 
5159 	verbose ("force enabling %s\n",
5160 			 getLanguageNameFull (new_language, true));
5161 	enableLanguage (new_language, true);
5162 
5163 	verbose ("force disabling %s\n",
5164 			 getLanguageNameFull (old_language, true));
5165 	enableLanguage (old_language, false);
5166 
5167 	return true;
5168 }
5169 
getLanguageCorkUsage(langType lang)5170 extern unsigned int getLanguageCorkUsage (langType lang)
5171 {
5172 	parserObject* const parser = LanguageTable + lang;
5173 	return parserCorkFlags (parser->def);
5174 }
5175 
5176 /*
5177  * The universal fallback parser.
5178  * If any parser doesn't handle the input, this parser is
5179  * used for the input when --languages=+Unknown is given.
5180  * writer-etags enables this parser implicitly.
5181  */
FallbackParser(void)5182 static parserDefinition *FallbackParser (void)
5183 {
5184 	parserDefinition *const def = parserNew ("Unknown");
5185 	def->extensions = NULL;
5186 	def->kindTable = NULL;
5187 	def->kindCount = 0;
5188 
5189 	/* A user can extend this parser with --regex-Unknown=...
5190 	 * or --langdef=MyParser{base=Unknown}.
5191 	 *
5192 	 * TODO: if following conditions are met, dontFindTags()
5193 	 * defined below can be used.
5194 	 * - any regex pattern is not defined,
5195 	 * - any sub parser is not defined, and
5196 	 * - end: field is not enabled.
5197 	 */
5198 	def->parser = findRegexTags;
5199 	def->enabled = 0;
5200 	def->method = METHOD_REGEX;
5201 	return def;
5202 }
5203 
5204 /*
5205  * A dummy parser for printing pseudo tags in xref output
5206  */
dontFindTags(void)5207 static void dontFindTags (void)
5208 {
5209 }
5210 
5211 static kindDefinition CtagsKinds[] = {
5212 	{true, 'p', "ptag", "pseudo tags"},
5213 };
5214 
CTagsParser(void)5215 static parserDefinition *CTagsParser (void)
5216 {
5217 	parserDefinition *const def = parserNew ("UniversalCtags");
5218 	def->extensions = NULL;
5219 	def->kindTable = CtagsKinds;
5220 	def->kindCount = ARRAY_SIZE(CtagsKinds);
5221 	def->parser = dontFindTags;
5222 	def->invisible = true;
5223 	return def;
5224 }
5225 
5226 /*
5227  * A parser for CTagsSelfTest (CTST)
5228  */
5229 #define SELF_TEST_PARSER "CTagsSelfTest"
5230 #if defined(DEBUG) && defined(HAVE_SECCOMP)
5231 extern void getppid(void);
5232 #endif
5233 
5234 static bool CTST_GatherStats;
5235 static int CTST_num_handled_char;
5236 
5237 typedef enum {
5238 	K_BROKEN,
5239 	K_NO_LETTER,
5240 	K_NO_LONG_NAME,
5241 	K_NOTHING_SPECIAL,
5242 	K_GUEST_BEGINNING,
5243 	K_GUEST_END,
5244 #if defined(DEBUG) && defined(HAVE_SECCOMP)
5245 	K_CALL_GETPPID,
5246 #endif
5247 	K_DISABLED,
5248 	K_ENABLED,
5249 	K_ROLES,
5250 	K_ROLES_DISABLED,
5251 	K_FIELD_TESTING,
5252 	K_TRIGGER_NOTICE,
5253 	KIND_COUNT
5254 } CTST_Kind;
5255 
5256 typedef enum {
5257 	R_BROKEN_REF,
5258 } CTST_BrokenRole;
5259 
5260 static roleDefinition CTST_BrokenRoles [] = {
5261 	{true, "broken", "broken" },
5262 };
5263 
5264 typedef enum {
5265 	R_DISABLED_KIND_DISABLED_ROLE,
5266 	R_DISABLED_KIND_ENABLED_ROLE,
5267 } CTST_DisabledKindRole;
5268 
5269 static roleDefinition CTST_DisabledKindRoles [] = {
5270 	{ false, "disabled", "disabled role attached to disabled kind" },
5271 	{ true,  "enabled",  "enabled role attached to disabled kind"  },
5272 };
5273 
5274 typedef enum {
5275 	R_ENABLED_KIND_DISABLED_ROLE,
5276 	R_ENABLED_KIND_ENABLED_ROLE,
5277 } CTST_EnabledKindRole;
5278 
5279 static roleDefinition CTST_EnabledKindRoles [] = {
5280 	{ false, "disabled", "disabled role attached to enabled kind" },
5281 	{ true,  "enabled",  "enabled role attached to enabled kind"  },
5282 };
5283 
5284 typedef enum {
5285 	R_ROLES_KIND_A_ROLE,
5286 	R_ROLES_KIND_B_ROLE,
5287 	R_ROLES_KIND_C_ROLE,
5288 	R_ROLES_KIND_D_ROLE,
5289 } CTST_RolesKindRole;
5290 
5291 static roleDefinition CTST_RolesKindRoles [] = {
5292 	{ true,  "a", "A role" },
5293 	{ true,  "b", "B role" },
5294 	{ false, "c", "C role" },
5295 	{ true,  "d", "D role"  },
5296 };
5297 
5298 typedef enum {
5299 	R_ROLES_DISABLED_KIND_A_ROLE,
5300 	R_ROLES_DISABLED_KIND_B_ROLE,
5301 } CTST_RolesDisableKindRole;
5302 
5303 
5304 static roleDefinition CTST_RolesDisabledKindRoles [] = {
5305 	{ true,  "A", "A role" },
5306 	{ true,  "B", "B role" },
5307 };
5308 
5309 static kindDefinition CTST_Kinds[KIND_COUNT] = {
5310 	/* `a' is reserved for kinddef testing */
5311 	{true, 'b', "broken tag", "name with unwanted characters",
5312 	 .referenceOnly = false, ATTACH_ROLES (CTST_BrokenRoles) },
5313 	{true, KIND_NULL_LETTER, "no letter", "kind with no letter"
5314 	 /* use '@' when testing. */
5315 	},
5316 	{true, 'L', NULL, "kind with no long name" },
5317 	{true, 'N', "nothingSpecial", "emit a normal tag" },
5318 	{true, 'B', NULL, "beginning of an area for a guest" },
5319 	{true, 'E', NULL, "end of an area for a guest" },
5320 #if defined(DEBUG) && defined(HAVE_SECCOMP)
5321 	{true, 'P', "callGetPPid", "trigger calling getppid(2) that seccomp sandbox disallows"},
5322 #endif
5323 	{false,'d', "disabled", "a kind disabled by default",
5324 	 .referenceOnly = false, ATTACH_ROLES (CTST_DisabledKindRoles)},
5325 	{true, 'e', "enabled", "a kind enabled by default",
5326 	 .referenceOnly = false, ATTACH_ROLES (CTST_EnabledKindRoles)},
5327 	{true, 'r', "roles", "emit a tag with multi roles",
5328 	 .referenceOnly = true, ATTACH_ROLES (CTST_RolesKindRoles)},
5329 	{false, 'R', "rolesDisabled", "emit a tag with multi roles(disabled by default)",
5330 	 .referenceOnly = true, ATTACH_ROLES (CTST_RolesDisabledKindRoles)},
5331 	{true,  'f', "fieldMaker", "tag for testing field:" },
5332 	{true,  'n', "triggerNotice", "trigger notice output"},
5333 };
5334 
5335 typedef enum {
5336 	F_BOOLEAN_FIELD,
5337 	F_BOOLEAN_AND_STRING_FIELD,
5338 	COUNT_FIELD
5339 } CTSTField;
5340 
5341 static fieldDefinition CTSTFields[COUNT_FIELD] = {
5342 	{ .name = "bField",
5343 	  .description = "field for testing boolean type",
5344 	  .dataType = FIELDTYPE_BOOL,
5345 	  .enabled = true,
5346 	},
5347 	{ .name = "sbField",
5348 	  .description = "field for testing string|boolean type",
5349 	  .dataType = FIELDTYPE_STRING|FIELDTYPE_BOOL,
5350 	  .enabled = true,
5351 	},
5352 };
5353 
createCTSTTags(void)5354 static void createCTSTTags (void)
5355 {
5356 	int i;
5357 	const unsigned char *line;
5358 	tagEntryInfo e;
5359 
5360 	unsigned long lb = 0;
5361 	unsigned long le = 0;
5362 
5363 	int found_enabled_disabled[2] = {0, 0};
5364 
5365 	TRACE_ENTER_TEXT("Parsing starts");
5366 
5367 	while ((line = readLineFromInputFile ()) != NULL)
5368 	{
5369 		int c = line[0];
5370 
5371 		for (i = 0; i < KIND_COUNT; i++)
5372 			if ((c == CTST_Kinds[i].letter && i != K_NO_LETTER)
5373 				|| (c == '@' && i == K_NO_LETTER))
5374 			{
5375 				if (CTST_GatherStats)
5376 					CTST_num_handled_char++;
5377 
5378 				switch (i)
5379 				{
5380 					case K_BROKEN:
5381 						initTagEntry (&e, "one\nof\rbroken\tname", i);
5382 						e.extensionFields.scopeKindIndex = K_BROKEN;
5383 						e.extensionFields.scopeName = "\\Broken\tContext";
5384 						makeTagEntry (&e);
5385 						initTagEntry (&e, "only\nnewline", i);
5386 						makeTagEntry (&e);
5387 						initTagEntry (&e, "only\ttab", i);
5388 						makeTagEntry (&e);
5389 						initTagEntry (&e, "newline-in-scope", i);
5390 						e.extensionFields.scopeKindIndex = K_BROKEN;
5391 						e.extensionFields.scopeName = "parent\nscope";
5392 						makeTagEntry (&e);
5393 						initTagEntry (&e, "tab-in-scope", i);
5394 						e.extensionFields.scopeKindIndex = K_BROKEN;
5395 						e.extensionFields.scopeName = "parent\tscope";
5396 						makeTagEntry (&e);
5397 						break;
5398 					case K_NO_LETTER:
5399 						initTagEntry (&e, "abnormal kindDefinition testing (no letter)", i);
5400 						makeTagEntry (&e);
5401 						break;
5402 					case K_NO_LONG_NAME:
5403 						initTagEntry (&e, "abnormal kindDefinition testing (no long name)", i);
5404 						makeTagEntry (&e);
5405 						break;
5406 					case K_NOTHING_SPECIAL:
5407 						if (!lb)
5408 						{
5409 							initTagEntry (&e, "NOTHING_SPECIAL", i);
5410 							makeTagEntry (&e);
5411 						}
5412 						break;
5413 					case K_GUEST_BEGINNING:
5414 						lb = getInputLineNumber ();
5415 						break;
5416 					case K_GUEST_END:
5417 						le = getInputLineNumber ();
5418 						makePromise (SELF_TEST_PARSER, lb + 1, 0, le, 0, lb + 1);
5419 						break;
5420 #if defined(DEBUG) && defined(HAVE_SECCOMP)
5421 				    case K_CALL_GETPPID:
5422 						getppid();
5423 						break;
5424 #endif
5425 				    case K_DISABLED:
5426 				    case K_ENABLED:
5427 						{
5428 							int role;
5429 							char *name;
5430 							if (found_enabled_disabled[i == K_DISABLED]++ == 0)
5431 							{
5432 								role = ROLE_DEFINITION_INDEX;
5433 								name = (i == K_DISABLED)
5434 									? "disable-kind-no-role"
5435 									: "enabled-kind-no-role";
5436 							}
5437 							else if (found_enabled_disabled[i == K_DISABLED]++ == 1)
5438 							{
5439 								role = (i == K_DISABLED)
5440 									? R_DISABLED_KIND_DISABLED_ROLE
5441 									: R_ENABLED_KIND_DISABLED_ROLE;
5442 								name = (i == K_DISABLED)
5443 									? "disable-kind-disabled-role"
5444 									: "enabled-kind-disabled-role";
5445 							}
5446 							else
5447 							{
5448 								role = (i == K_DISABLED)
5449 									? R_DISABLED_KIND_ENABLED_ROLE
5450 									: R_ENABLED_KIND_ENABLED_ROLE;
5451 								name = (i == K_DISABLED)
5452 									? "disable-kind-enabled-role"
5453 									: "enabled-kind-enabled-role";
5454 							}
5455 							initRefTagEntry (&e, name, i, role);
5456 							makeTagEntry (&e);
5457 							break;
5458 						}
5459 					case K_ROLES:
5460 					{
5461 						char *name = "multiRolesTarget";
5462 						int qindex;
5463 						tagEntryInfo *qe;
5464 
5465 						initTagEntry (&e, name, i);
5466 						assignRole(&e, R_ROLES_KIND_A_ROLE);
5467 						assignRole(&e, R_ROLES_KIND_C_ROLE);
5468 						assignRole(&e, R_ROLES_KIND_D_ROLE);
5469 						qindex = makeTagEntry (&e);
5470 						qe = getEntryInCorkQueue (qindex);
5471 						if (qe)
5472 							assignRole(qe, R_ROLES_KIND_B_ROLE);
5473 						break;
5474 					}
5475 					case K_ROLES_DISABLED:
5476 					{
5477 						char *name = "multiRolesDisabledTarget";
5478 
5479 						initRefTagEntry (&e, name, i, R_ROLES_DISABLED_KIND_A_ROLE);
5480 						makeTagEntry (&e);
5481 						initRefTagEntry (&e, name, i, R_ROLES_DISABLED_KIND_B_ROLE);
5482 						makeTagEntry (&e);
5483 						break;
5484 					}
5485 					case K_FIELD_TESTING:
5486 					{
5487 						char c = 'a';
5488 						char name []= {'\0', 't', 'a', 'g', '\0' };
5489 
5490 						name [0] = c++;
5491 						initTagEntry (&e, name, i);
5492 						attachParserField (&e, false,
5493 										   CTSTFields[F_BOOLEAN_FIELD].ftype, "");
5494 						makeTagEntry (&e);
5495 
5496 						name [0] = c++;
5497 						initTagEntry (&e, name, i);
5498 						makeTagEntry (&e);
5499 
5500 						name [0] = c++;
5501 						initTagEntry (&e, name, i);
5502 						attachParserField (&e, false,
5503 										   CTSTFields[F_BOOLEAN_AND_STRING_FIELD].ftype, "val");
5504 						makeTagEntry (&e);
5505 
5506 						name [0] = c++;
5507 						initTagEntry (&e, name, i);
5508 						attachParserField (&e, false,
5509 										   CTSTFields[F_BOOLEAN_AND_STRING_FIELD].ftype, "");
5510 						makeTagEntry (&e);
5511 
5512 						break;
5513 					}
5514 					case K_TRIGGER_NOTICE:
5515 						notice ("notice output for testing: %s", CTST_Kinds [i].name);
5516 						break;
5517 				}
5518 			}
5519 	}
5520 
5521 	TRACE_LEAVE();
5522 }
5523 
initStatsCTST(langType lang CTAGS_ATTR_UNUSED)5524 static void initStatsCTST (langType lang CTAGS_ATTR_UNUSED)
5525 {
5526 	CTST_GatherStats = true;
5527 }
5528 
printStatsCTST(langType lang CTAGS_ATTR_UNUSED)5529 static void printStatsCTST (langType lang CTAGS_ATTR_UNUSED)
5530 {
5531 	fprintf (stderr, "The number of handled chars: %d\n",
5532 			 CTST_num_handled_char);
5533 }
5534 
CTagsSelfTestParser(void)5535 static parserDefinition *CTagsSelfTestParser (void)
5536 {
5537 	static const char *const extensions[] = { NULL };
5538 	parserDefinition *const def = parserNew (SELF_TEST_PARSER);
5539 	def->extensions = extensions;
5540 	def->kindTable = CTST_Kinds;
5541 	def->kindCount = KIND_COUNT;
5542 	def->parser = createCTSTTags;
5543 	def->invisible = true;
5544 	def->useMemoryStreamInput = true;
5545 	def->useCork = CORK_QUEUE;
5546 	def->initStats = initStatsCTST;
5547 	def->printStats = printStatsCTST;
5548 	def->fieldTable = CTSTFields;
5549 	def->fieldCount = ARRAY_SIZE (CTSTFields);
5550 
5551 	return def;
5552 }
5553