xref: /Universal-ctags/main/selectors.c (revision d325d3e6cf56987737f10b7c02f7aa9821681a22)
1 /*
2  * Copyright (c) 2015, Dmitri Tikhonov
3  *
4  * This source code is released for free distribution under the terms of the
5  * GNU General Public License version 2 or (at your option) any later version.
6  *
7  * selectors.c -- routines for selecting a language
8  */
9 
10 #include "general.h"
11 
12 #include <ctype.h>
13 #include <stdio.h>
14 #include <string.h>
15 
16 #include "debug.h"
17 #include "parse_p.h"
18 #include "options.h"
19 #include "selectors.h"
20 #include "vstring.h"
21 #include "mio.h"
22 
23 static const char *TR_UNKNOWN = NULL;
24 static const char *TR_PERL5   = "Perl";
25 static const char *TR_PERL6   = "Perl6";
26 
27 static const char *TR_OBJC    = "ObjectiveC";
28 static const char *TR_MATLAB  = "MatLab";
29 
30 static const char *TR_CPP     = "C++";
31 
32 static const char *TR_R       = "R";
33 static const char *TR_ASM     = "Asm";
34 
35 static const char *TR_REXX     = "REXX";
36 static const char *TR_DOSBATCH = "DosBatch";
37 
38 static const char *TR_LISP     = "Lisp";
39 static const char *TR_LEX      = "LEX";
40 
41 #define startsWith(line,prefix) \
42   (strncmp(line, prefix, strlen(prefix)) == 0? true: false)
43 
selectByLines(MIO * input,const char * (* lineTaster)(const char *,void *),const char * defaultLang,void * userData)44 static const char *selectByLines (MIO *input,
45 				  const char* (* lineTaster) (const char *, void *),
46 				  const char* defaultLang,
47 				  void *userData)
48 {
49     char line[0x800];
50     while (mio_gets(input, line, sizeof(line))) {
51 	const char *lang = lineTaster (line, userData);
52 	if (lang)
53 	    return lang;
54     }
55     return defaultLang;
56 }
57 
58 /* Returns "Perl" or "Perl6" or NULL if it does not taste like anything */
59 static const char *
tastePerlLine(const char * line,void * data CTAGS_ATTR_UNUSED)60 tastePerlLine (const char *line, void *data CTAGS_ATTR_UNUSED)
61 {
62     while (isspace(*line))
63         ++line;
64 #define STRLEN(s) (sizeof(s) - 1)
65 /* Assume the first character has been checked: */
66 #define CHECK_PART(line, s) (    \
67     0 == strncmp((line) + 1, (s) + 1, STRLEN(s) - 1) && \
68     !isalnum((line)[STRLEN(s)]))
69     switch (line[0]) {
70         case '#':       /* TODO: taste modeline */
71         case '\0':
72             return TR_UNKNOWN;
73         case '=':
74             if (CHECK_PART(line, "=head1"))
75                 return TR_PERL5;
76             if (CHECK_PART(line, "=head2"))
77                 return TR_PERL5;
78             break;
79         case 'c':
80             if (CHECK_PART(line, "class"))
81                 return TR_PERL6;
82             break;
83         case 'g':
84             if (CHECK_PART(line, "grammar"))
85                 return TR_PERL6;
86             break;
87         case 'm':
88             /* TODO: my may be many things: class, role, etc. */
89             if (CHECK_PART(line, "my class"))
90                 return TR_PERL6;
91             if (CHECK_PART(line, "method"))
92                 return TR_PERL6;
93             if (CHECK_PART(line, "multi"))
94                 return TR_PERL6;
95             break;
96         case 'n':
97             if (CHECK_PART(line, "need"))
98                 return TR_PERL6;
99             break;
100         case 'p':
101             if (CHECK_PART(line, "package"))
102                 return TR_PERL5;
103             break;
104         case 'r':
105             if (CHECK_PART(line, "role"))
106                 return TR_PERL6;
107             if (CHECK_PART(line, "require 5"))
108                 return TR_PERL5;
109             break;
110         case 'u':
111             if (CHECK_PART(line, "unit"))
112                 return TR_PERL6;
113             if (CHECK_PART(line, "use v6"))
114                 return TR_PERL6;
115             if (CHECK_PART(line, "use nqp"))
116                 return TR_PERL5;
117             if (CHECK_PART(line, "use warnings"))
118                 return TR_PERL5;
119             break;
120     }
121 #undef CHECK_PART
122     return TR_UNKNOWN;
123 }
124 
125 const char *
selectByPickingPerlVersion(MIO * input,langType * candidates CTAGS_ATTR_UNUSED,unsigned int nCandidates CTAGS_ATTR_UNUSED)126 selectByPickingPerlVersion (MIO *input,
127 							langType *candidates CTAGS_ATTR_UNUSED,
128 							unsigned int nCandidates CTAGS_ATTR_UNUSED)
129 {
130     /* Default to Perl 5 */
131     return selectByLines (input, tastePerlLine, TR_PERL5, NULL);
132 }
133 
134 static const char *
tasteObjectiveCOrMatLabLines(const char * line,void * data CTAGS_ATTR_UNUSED)135 tasteObjectiveCOrMatLabLines (const char *line, void *data CTAGS_ATTR_UNUSED)
136 {
137     if (startsWith (line, "% ")
138 	|| startsWith (line, "%{"))
139 	return TR_MATLAB;
140     else if (startsWith (line, "// ")
141 	     || startsWith (line, "/* "))
142 	return TR_OBJC;
143     else if (startsWith (line, "#include")
144 	     || startsWith (line, "#import")
145 	     || startsWith (line, "#define ")
146 	     || startsWith (line, "#ifdef "))
147 	return TR_OBJC;
148     else if (startsWith (line, "@interface ")
149 	     || startsWith (line, "@implementation ")
150 	     || startsWith (line, "@protocol "))
151 	return TR_OBJC;
152     else if (startsWith (line, "struct ")
153 	     || startsWith (line, "union ")
154 	     || startsWith (line, "typedef "))
155 	return TR_OBJC;
156     else {
157 	if (startsWith (line, "function ")) {
158 	    const char *p = line + strlen ("function ");
159 	    while (isspace(*p))
160 		p++;
161 	    if (*p != '\0' && *p != '(')
162 		return TR_MATLAB;
163 	}
164     }
165     return NULL;
166 }
167 
168 const char *
selectByObjectiveCAndMatLabKeywords(MIO * input,langType * candidates CTAGS_ATTR_UNUSED,unsigned int nCandidates CTAGS_ATTR_UNUSED)169 selectByObjectiveCAndMatLabKeywords (MIO * input,
170 									 langType *candidates CTAGS_ATTR_UNUSED,
171 									 unsigned int nCandidates CTAGS_ATTR_UNUSED)
172 {
173     return selectByLines (input, tasteObjectiveCOrMatLabLines,
174 			  NULL, NULL);
175 }
176 
177 static const char *
tasteObjectiveC(const char * line,void * data CTAGS_ATTR_UNUSED)178 tasteObjectiveC (const char *line, void *data CTAGS_ATTR_UNUSED)
179 {
180     if (startsWith (line, "#import")
181 	|| startsWith (line, "@interface ")
182 	|| startsWith (line, "@implementation ")
183 	|| startsWith (line, "@protocol "))
184 	return TR_OBJC;
185     return NULL;
186 }
187 
188 const char *
selectByObjectiveCKeywords(MIO * input,langType * candidates CTAGS_ATTR_UNUSED,unsigned int nCandidates CTAGS_ATTR_UNUSED)189 selectByObjectiveCKeywords (MIO * input,
190 							langType *candidates CTAGS_ATTR_UNUSED,
191 							unsigned int nCandidates CTAGS_ATTR_UNUSED)
192 {
193     /* TODO: Ideally opening input should be delayed til
194        enable/disable based selection is done. */
195 
196     static langType objc = LANG_IGNORE;
197     static langType cpp = LANG_IGNORE;
198 
199     if (objc == LANG_IGNORE)
200 	objc = getNamedLanguage (TR_OBJC, 0);
201 
202     if (cpp == LANG_IGNORE)
203 	cpp = getNamedLanguage (TR_CPP, 0);
204 
205     Assert (0 <= objc);
206     Assert (0 <= cpp);
207 
208     if (! isLanguageEnabled (objc))
209 	return TR_CPP;
210     else if (! isLanguageEnabled (cpp))
211 	return TR_OBJC;
212 
213     return selectByLines (input, tasteObjectiveC, TR_CPP,
214 			  NULL);
215 }
216 
217 static const char *
tasteR(const char * line,void * data CTAGS_ATTR_UNUSED)218 tasteR (const char *line, void *data CTAGS_ATTR_UNUSED)
219 {
220 	/* As far as reading test cases in GNU assembler,
221 	   assembly language for d10v and d30v processors
222 	   uses "<-" as part its syntax. I cannot find better
223 	   hint for distinguishing between the assembly
224 	   language and R.
225 	   ----
226 	   binutils-2.15.92.0.2/gas/testsuite/gas/d30v/mul.s */
227 	return strstr (line, "<-")? TR_R: NULL;
228 }
229 
230 const char *
selectByArrowOfR(MIO * input,langType * candidates CTAGS_ATTR_UNUSED,unsigned int nCandidates CTAGS_ATTR_UNUSED)231 selectByArrowOfR (MIO *input,
232 				  langType *candidates CTAGS_ATTR_UNUSED,
233 				  unsigned int nCandidates CTAGS_ATTR_UNUSED)
234 {
235     /* TODO: Ideally opening input should be delayed till
236        enable/disable based selection is done. */
237 
238     static langType R   = LANG_IGNORE;
239     static langType Asm = LANG_IGNORE;
240 
241     if (R == LANG_IGNORE)
242 	    R = getNamedLanguage (TR_R, 0);
243 
244     if (Asm == LANG_IGNORE)
245 	    Asm = getNamedLanguage (TR_ASM, 0);
246 
247     Assert (0 <= R);
248     Assert (0 <= Asm);
249 
250     if (! isLanguageEnabled (R))
251 	    return TR_ASM;
252     else if (! isLanguageEnabled (Asm))
253 	    return TR_R;
254 
255     return selectByLines (input, tasteR, NULL,
256 			  NULL);
257 }
258 
259 static const char *
tasteREXXOrDosBatch(const char * line,void * data)260 tasteREXXOrDosBatch (const char *line, void *data)
261 {
262 	bool * in_rexx_comment = data;
263 
264 	if (startsWith (line, ":"))
265 		return TR_DOSBATCH;
266 	else if (*in_rexx_comment
267 		 && strstr (line, "*/"))
268 		return TR_REXX;
269 	else if (strstr (line, "/*"))
270 	{
271 		*in_rexx_comment = true;
272 		return NULL;
273 	}
274 	else
275 		return NULL;
276 }
277 
278 const char *
selectByRexxCommentAndDosbatchLabelPrefix(MIO * input,langType * candidates CTAGS_ATTR_UNUSED,unsigned int nCandidates CTAGS_ATTR_UNUSED)279 selectByRexxCommentAndDosbatchLabelPrefix (MIO *input,
280 										   langType *candidates CTAGS_ATTR_UNUSED,
281 										   unsigned int nCandidates CTAGS_ATTR_UNUSED)
282 {
283     /* TODO: Ideally opening input should be delayed till
284        enable/disable based selection is done. */
285 
286     static langType rexx     = LANG_IGNORE;
287     static langType dosbatch = LANG_IGNORE;
288     bool in_rexx_comment = false;
289 
290     if (rexx == LANG_IGNORE)
291 	    rexx = getNamedLanguage (TR_R, 0);
292 
293     if (dosbatch == LANG_IGNORE)
294 	    dosbatch = getNamedLanguage (TR_DOSBATCH, 0);
295 
296     Assert (0 <= rexx);
297     Assert (0 <= dosbatch);
298 
299     if (! isLanguageEnabled (rexx))
300 	    return TR_DOSBATCH;
301     else if (! isLanguageEnabled (dosbatch))
302 	    return TR_REXX;
303 
304     return selectByLines (input, tasteREXXOrDosBatch,
305 			  NULL, &in_rexx_comment);
306 }
307 
308 static const char *
tasteLispOrLEXLines(const char * line,void * data CTAGS_ATTR_UNUSED)309 tasteLispOrLEXLines (const char *line, void *data CTAGS_ATTR_UNUSED)
310 {
311 	if (strcmp(line, "%{\n") == 0
312 		|| strcmp(line, "%top{\n") == 0
313 		|| strcmp(line, "%%\n") == 0)
314 		return TR_LEX;
315 	return TR_UNKNOWN;
316 }
317 
318 const char *
selectLispOrLEXByLEXMarker(MIO * input,langType * candidates CTAGS_ATTR_UNUSED,unsigned int nCandidates CTAGS_ATTR_UNUSED)319 selectLispOrLEXByLEXMarker (MIO *input,
320 							langType *candidates CTAGS_ATTR_UNUSED,
321 							unsigned int nCandidates CTAGS_ATTR_UNUSED)
322 {
323 	return selectByLines (input, tasteLispOrLEXLines, TR_LISP, NULL);
324 }
325 
326 #ifdef HAVE_LIBXML
327 
328 #include <libxml/xpath.h>
329 #include <libxml/tree.h>
330 
suppressWarning(void * ctx CTAGS_ATTR_UNUSED,const char * msg CTAGS_ATTR_UNUSED,...)331 static void suppressWarning (void *ctx CTAGS_ATTR_UNUSED, const char *msg CTAGS_ATTR_UNUSED, ...)
332 {
333 }
334 
335 static xmlDocPtr
xmlParseMIO(MIO * input)336 xmlParseMIO (MIO *input)
337 {
338 	const unsigned char *buf;
339 	size_t len;
340 
341 	buf = mio_memory_get_data (input, &len);
342 	Assert (buf);
343 
344 	xmlSetGenericErrorFunc (NULL, suppressWarning);
345 	xmlLineNumbersDefault (1);
346 	return xmlParseMemory((const char *)buf, len);
347 }
348 
349 static bool
matchXpathFileSpec(xmlDocPtr doc,xpathFileSpec * spec)350 matchXpathFileSpec (xmlDocPtr doc, xpathFileSpec *spec)
351 {
352 	if (spec->rootElementName)
353 	{
354 		if (*spec->rootElementName == '\0')
355 		{
356 			/* The statement is just for keeping code symmetric.
357 			   Meaningless examination: a root element is
358 			   always there.*/
359 			if (doc->children && doc->children->name)
360 				return false;
361 		}
362 		else if (! (doc->children
363 					&& doc->children->name
364 					&& (strcmp (spec->rootElementName, (char *)doc->children->name) == 0)))
365 			return false;
366 		else
367 			verbose ("		Xml[rootElementName]== %s\n",
368 					 spec->rootElementName);
369 	}
370 
371 	if (spec->nameInDTD)
372 	{
373 		if (*spec->nameInDTD == '\0')
374 		{
375 			if (doc->intSubset && doc->intSubset->name)
376 				return false;
377 		}
378 		else if (! (doc->intSubset
379 					&& doc->intSubset->name
380 					&& (strcmp (spec->nameInDTD, (char *)doc->intSubset->name) == 0)))
381 			return false;
382 		else
383 			verbose ("		Xml[nameInDTD]== %s\n",
384 					 spec->nameInDTD);
385 	}
386 
387 	if (spec->externalID)
388 	{
389 		if (*spec->externalID == '\0')
390 		{
391 			if (doc->intSubset && doc->intSubset->ExternalID)
392 				return false;
393 		}
394 		else if (! (doc->intSubset
395 					&& doc->intSubset->ExternalID
396 					&& (strcmp (spec->externalID, (char *)doc->intSubset->ExternalID) == 0)))
397 			return false;
398 		else
399 			verbose ("		Xml[externalID]== %s\n",
400 					 spec->externalID);
401 
402 	}
403 
404 	if (spec->systemID)
405 	{
406 		if (*spec->systemID == '\0')
407 		{
408 			if (doc->intSubset && doc->intSubset->SystemID)
409 				return false;
410 		}
411 		else if (! (doc->intSubset
412 					&& doc->intSubset->SystemID
413 					&& (strcmp (spec->systemID, (char *)doc->intSubset->SystemID) == 0)))
414 			return false;
415 		else
416 			verbose ("		Xml[systemID]== %s\n",
417 					 spec->systemID);
418 	}
419 
420 	if (spec->rootNSPrefix)
421 	{
422 		if (*spec->rootNSPrefix == '\0')
423 		{
424 			if (doc->children && doc->children->ns && doc->children->ns->prefix)
425 				return false;
426 		}
427 		else if (! (doc->children
428 					&& doc->children->ns
429 					&& doc->children->ns->prefix
430 					&& (strcmp (spec->rootNSPrefix, (char *)doc->children->ns->prefix))))
431 			return false;
432 		else
433 			verbose ("		Xml[rootNSPrefix]== %s\n",
434 					 spec->rootNSPrefix);
435 	}
436 
437 	if (spec->rootNSHref)
438 	{
439 		if (*spec->rootNSHref == '\0')
440 		{
441 			if (doc->children && doc->children->ns && doc->children->ns->href)
442 				return false;
443 		}
444 		else if (! (doc->children
445 					&& doc->children->ns
446 					&& doc->children->ns->href
447 					&& (strcmp (spec->rootNSHref, (char *)doc->children->ns->href) == 0)))
448 			return false;
449 		else
450 			verbose ("		Xml[rootNSHref]== %s\n",
451 					 spec->rootNSHref);
452 	}
453 	return true;
454 }
455 
456 static const char *
selectParserForXmlDoc(xmlDocPtr doc,langType * candidates,unsigned int nCandidates)457 selectParserForXmlDoc (xmlDocPtr doc,
458 					   langType *candidates,
459 					   unsigned int nCandidates)
460 {
461 
462 	unsigned int lang_index;
463 	bool xml_parser_is_in_candidate = false;;
464 
465 	verbose ("		Xml[rootElementName]: %s\n",
466 			 (doc->children && doc->children->name)
467 			 ? ((char *)doc->children->name): "-");
468 	verbose ("		Xml[nameInDTD]: %s\n",
469 			 (doc->intSubset && doc->intSubset->name)
470 			 ? ((char *)doc->intSubset->name): "-");
471 	verbose ("		Xml[externalID]: %s\n",
472 			 (doc->intSubset && doc->intSubset->ExternalID)
473 			 ? ((char *)doc->intSubset->ExternalID): "-");
474 	verbose ("		Xml[systemID]: %s\n",
475 			 (doc->intSubset && doc->intSubset->SystemID)
476 			 ? ((char *)doc->intSubset->SystemID): "-");
477 	verbose ("		Xml[rootNSPrefix]: %s\n",
478 			 (doc->children && doc->children->ns && doc->children->ns->prefix)
479 			 ? ((char *)doc->children->ns->prefix): "-");
480 	verbose ("		Xml[rootNSHref]: %s\n",
481 			 (doc->children && doc->children->ns && doc->children->ns->href)
482 			 ? ((char *)doc->children->ns->href): "-");
483 
484 	for (lang_index = 0; lang_index < nCandidates; lang_index++)
485 	{
486 		unsigned int spec_index;
487 		xpathFileSpec* spec;
488 		unsigned int spec_count;
489 
490 		verbose ("		lxpath examines %s\n", getLanguageName (candidates[lang_index]));
491 
492 		spec_count = getXpathFileSpecCount (candidates[lang_index]);
493 		for (spec_index = 0; spec_index < spec_count; spec_index++)
494 		{
495 			spec = getXpathFileSpec (candidates[lang_index], spec_index);
496 			if (matchXpathFileSpec (doc, spec))
497 				return getLanguageName (candidates[lang_index]);
498 		}
499 
500 		if (strcmp (getLanguageName (candidates[lang_index]), "XML") == 0)
501 			xml_parser_is_in_candidate = true;
502 	}
503 
504 	if (xml_parser_is_in_candidate)
505 	{
506 		verbose ("		Use generic XML parser as fallback\n");
507 		return "XML";
508 	}
509 
510 	return NULL;
511 }
512 
513 const char *
selectByXpathFileSpec(MIO * input,langType * candidates,unsigned int nCandidates)514 selectByXpathFileSpec (MIO *input,
515 					   langType *candidates,
516 					   unsigned int nCandidates)
517 {
518 	xmlDocPtr doc;
519 	const char *r = NULL;
520 
521 	doc = xmlParseMIO (input);
522 	if (doc == NULL)
523 		return NULL;
524 
525 	r = selectParserForXmlDoc (doc, candidates, nCandidates);
526 
527 	if (r == NULL)
528 		xmlFreeDoc (doc);
529 	else
530 		mio_attach_user_data (input,
531 				      doc,(MIODestroyNotify)xmlFreeDoc);
532 
533 	return r;
534 }
535 
536 #else
537 
538 const char *
selectByXpathFileSpec(MIO * input,langType * candidates,unsigned int nCandidates)539 selectByXpathFileSpec (MIO *input,
540 					   langType *candidates,
541 					   unsigned int nCandidates)
542 {
543 	return NULL;
544 }
545 
546 #endif
547