1 /*
2 * Copyright (c) 2015, Dmitri Tikhonov
3 *
4 * This source code is released for free distribution under the terms of the
5 * GNU General Public License version 2 or (at your option) any later version.
6 *
7 * selectors.c -- routines for selecting a language
8 */
9
10 #include "general.h"
11
12 #include <ctype.h>
13 #include <stdio.h>
14 #include <string.h>
15
16 #include "debug.h"
17 #include "parse_p.h"
18 #include "options.h"
19 #include "selectors.h"
20 #include "vstring.h"
21 #include "mio.h"
22
23 static const char *TR_UNKNOWN = NULL;
24 static const char *TR_PERL5 = "Perl";
25 static const char *TR_PERL6 = "Perl6";
26
27 static const char *TR_OBJC = "ObjectiveC";
28 static const char *TR_MATLAB = "MatLab";
29
30 static const char *TR_CPP = "C++";
31
32 static const char *TR_R = "R";
33 static const char *TR_ASM = "Asm";
34
35 static const char *TR_REXX = "REXX";
36 static const char *TR_DOSBATCH = "DosBatch";
37
38 static const char *TR_LISP = "Lisp";
39 static const char *TR_LEX = "LEX";
40
41 #define startsWith(line,prefix) \
42 (strncmp(line, prefix, strlen(prefix)) == 0? true: false)
43
selectByLines(MIO * input,const char * (* lineTaster)(const char *,void *),const char * defaultLang,void * userData)44 static const char *selectByLines (MIO *input,
45 const char* (* lineTaster) (const char *, void *),
46 const char* defaultLang,
47 void *userData)
48 {
49 char line[0x800];
50 while (mio_gets(input, line, sizeof(line))) {
51 const char *lang = lineTaster (line, userData);
52 if (lang)
53 return lang;
54 }
55 return defaultLang;
56 }
57
58 /* Returns "Perl" or "Perl6" or NULL if it does not taste like anything */
59 static const char *
tastePerlLine(const char * line,void * data CTAGS_ATTR_UNUSED)60 tastePerlLine (const char *line, void *data CTAGS_ATTR_UNUSED)
61 {
62 while (isspace(*line))
63 ++line;
64 #define STRLEN(s) (sizeof(s) - 1)
65 /* Assume the first character has been checked: */
66 #define CHECK_PART(line, s) ( \
67 0 == strncmp((line) + 1, (s) + 1, STRLEN(s) - 1) && \
68 !isalnum((line)[STRLEN(s)]))
69 switch (line[0]) {
70 case '#': /* TODO: taste modeline */
71 case '\0':
72 return TR_UNKNOWN;
73 case '=':
74 if (CHECK_PART(line, "=head1"))
75 return TR_PERL5;
76 if (CHECK_PART(line, "=head2"))
77 return TR_PERL5;
78 break;
79 case 'c':
80 if (CHECK_PART(line, "class"))
81 return TR_PERL6;
82 break;
83 case 'g':
84 if (CHECK_PART(line, "grammar"))
85 return TR_PERL6;
86 break;
87 case 'm':
88 /* TODO: my may be many things: class, role, etc. */
89 if (CHECK_PART(line, "my class"))
90 return TR_PERL6;
91 if (CHECK_PART(line, "method"))
92 return TR_PERL6;
93 if (CHECK_PART(line, "multi"))
94 return TR_PERL6;
95 break;
96 case 'n':
97 if (CHECK_PART(line, "need"))
98 return TR_PERL6;
99 break;
100 case 'p':
101 if (CHECK_PART(line, "package"))
102 return TR_PERL5;
103 break;
104 case 'r':
105 if (CHECK_PART(line, "role"))
106 return TR_PERL6;
107 if (CHECK_PART(line, "require 5"))
108 return TR_PERL5;
109 break;
110 case 'u':
111 if (CHECK_PART(line, "unit"))
112 return TR_PERL6;
113 if (CHECK_PART(line, "use v6"))
114 return TR_PERL6;
115 if (CHECK_PART(line, "use nqp"))
116 return TR_PERL5;
117 if (CHECK_PART(line, "use warnings"))
118 return TR_PERL5;
119 break;
120 }
121 #undef CHECK_PART
122 return TR_UNKNOWN;
123 }
124
125 const char *
selectByPickingPerlVersion(MIO * input,langType * candidates CTAGS_ATTR_UNUSED,unsigned int nCandidates CTAGS_ATTR_UNUSED)126 selectByPickingPerlVersion (MIO *input,
127 langType *candidates CTAGS_ATTR_UNUSED,
128 unsigned int nCandidates CTAGS_ATTR_UNUSED)
129 {
130 /* Default to Perl 5 */
131 return selectByLines (input, tastePerlLine, TR_PERL5, NULL);
132 }
133
134 static const char *
tasteObjectiveCOrMatLabLines(const char * line,void * data CTAGS_ATTR_UNUSED)135 tasteObjectiveCOrMatLabLines (const char *line, void *data CTAGS_ATTR_UNUSED)
136 {
137 if (startsWith (line, "% ")
138 || startsWith (line, "%{"))
139 return TR_MATLAB;
140 else if (startsWith (line, "// ")
141 || startsWith (line, "/* "))
142 return TR_OBJC;
143 else if (startsWith (line, "#include")
144 || startsWith (line, "#import")
145 || startsWith (line, "#define ")
146 || startsWith (line, "#ifdef "))
147 return TR_OBJC;
148 else if (startsWith (line, "@interface ")
149 || startsWith (line, "@implementation ")
150 || startsWith (line, "@protocol "))
151 return TR_OBJC;
152 else if (startsWith (line, "struct ")
153 || startsWith (line, "union ")
154 || startsWith (line, "typedef "))
155 return TR_OBJC;
156 else {
157 if (startsWith (line, "function ")) {
158 const char *p = line + strlen ("function ");
159 while (isspace(*p))
160 p++;
161 if (*p != '\0' && *p != '(')
162 return TR_MATLAB;
163 }
164 }
165 return NULL;
166 }
167
168 const char *
selectByObjectiveCAndMatLabKeywords(MIO * input,langType * candidates CTAGS_ATTR_UNUSED,unsigned int nCandidates CTAGS_ATTR_UNUSED)169 selectByObjectiveCAndMatLabKeywords (MIO * input,
170 langType *candidates CTAGS_ATTR_UNUSED,
171 unsigned int nCandidates CTAGS_ATTR_UNUSED)
172 {
173 return selectByLines (input, tasteObjectiveCOrMatLabLines,
174 NULL, NULL);
175 }
176
177 static const char *
tasteObjectiveC(const char * line,void * data CTAGS_ATTR_UNUSED)178 tasteObjectiveC (const char *line, void *data CTAGS_ATTR_UNUSED)
179 {
180 if (startsWith (line, "#import")
181 || startsWith (line, "@interface ")
182 || startsWith (line, "@implementation ")
183 || startsWith (line, "@protocol "))
184 return TR_OBJC;
185 return NULL;
186 }
187
188 const char *
selectByObjectiveCKeywords(MIO * input,langType * candidates CTAGS_ATTR_UNUSED,unsigned int nCandidates CTAGS_ATTR_UNUSED)189 selectByObjectiveCKeywords (MIO * input,
190 langType *candidates CTAGS_ATTR_UNUSED,
191 unsigned int nCandidates CTAGS_ATTR_UNUSED)
192 {
193 /* TODO: Ideally opening input should be delayed til
194 enable/disable based selection is done. */
195
196 static langType objc = LANG_IGNORE;
197 static langType cpp = LANG_IGNORE;
198
199 if (objc == LANG_IGNORE)
200 objc = getNamedLanguage (TR_OBJC, 0);
201
202 if (cpp == LANG_IGNORE)
203 cpp = getNamedLanguage (TR_CPP, 0);
204
205 Assert (0 <= objc);
206 Assert (0 <= cpp);
207
208 if (! isLanguageEnabled (objc))
209 return TR_CPP;
210 else if (! isLanguageEnabled (cpp))
211 return TR_OBJC;
212
213 return selectByLines (input, tasteObjectiveC, TR_CPP,
214 NULL);
215 }
216
217 static const char *
tasteR(const char * line,void * data CTAGS_ATTR_UNUSED)218 tasteR (const char *line, void *data CTAGS_ATTR_UNUSED)
219 {
220 /* As far as reading test cases in GNU assembler,
221 assembly language for d10v and d30v processors
222 uses "<-" as part its syntax. I cannot find better
223 hint for distinguishing between the assembly
224 language and R.
225 ----
226 binutils-2.15.92.0.2/gas/testsuite/gas/d30v/mul.s */
227 return strstr (line, "<-")? TR_R: NULL;
228 }
229
230 const char *
selectByArrowOfR(MIO * input,langType * candidates CTAGS_ATTR_UNUSED,unsigned int nCandidates CTAGS_ATTR_UNUSED)231 selectByArrowOfR (MIO *input,
232 langType *candidates CTAGS_ATTR_UNUSED,
233 unsigned int nCandidates CTAGS_ATTR_UNUSED)
234 {
235 /* TODO: Ideally opening input should be delayed till
236 enable/disable based selection is done. */
237
238 static langType R = LANG_IGNORE;
239 static langType Asm = LANG_IGNORE;
240
241 if (R == LANG_IGNORE)
242 R = getNamedLanguage (TR_R, 0);
243
244 if (Asm == LANG_IGNORE)
245 Asm = getNamedLanguage (TR_ASM, 0);
246
247 Assert (0 <= R);
248 Assert (0 <= Asm);
249
250 if (! isLanguageEnabled (R))
251 return TR_ASM;
252 else if (! isLanguageEnabled (Asm))
253 return TR_R;
254
255 return selectByLines (input, tasteR, NULL,
256 NULL);
257 }
258
259 static const char *
tasteREXXOrDosBatch(const char * line,void * data)260 tasteREXXOrDosBatch (const char *line, void *data)
261 {
262 bool * in_rexx_comment = data;
263
264 if (startsWith (line, ":"))
265 return TR_DOSBATCH;
266 else if (*in_rexx_comment
267 && strstr (line, "*/"))
268 return TR_REXX;
269 else if (strstr (line, "/*"))
270 {
271 *in_rexx_comment = true;
272 return NULL;
273 }
274 else
275 return NULL;
276 }
277
278 const char *
selectByRexxCommentAndDosbatchLabelPrefix(MIO * input,langType * candidates CTAGS_ATTR_UNUSED,unsigned int nCandidates CTAGS_ATTR_UNUSED)279 selectByRexxCommentAndDosbatchLabelPrefix (MIO *input,
280 langType *candidates CTAGS_ATTR_UNUSED,
281 unsigned int nCandidates CTAGS_ATTR_UNUSED)
282 {
283 /* TODO: Ideally opening input should be delayed till
284 enable/disable based selection is done. */
285
286 static langType rexx = LANG_IGNORE;
287 static langType dosbatch = LANG_IGNORE;
288 bool in_rexx_comment = false;
289
290 if (rexx == LANG_IGNORE)
291 rexx = getNamedLanguage (TR_R, 0);
292
293 if (dosbatch == LANG_IGNORE)
294 dosbatch = getNamedLanguage (TR_DOSBATCH, 0);
295
296 Assert (0 <= rexx);
297 Assert (0 <= dosbatch);
298
299 if (! isLanguageEnabled (rexx))
300 return TR_DOSBATCH;
301 else if (! isLanguageEnabled (dosbatch))
302 return TR_REXX;
303
304 return selectByLines (input, tasteREXXOrDosBatch,
305 NULL, &in_rexx_comment);
306 }
307
308 static const char *
tasteLispOrLEXLines(const char * line,void * data CTAGS_ATTR_UNUSED)309 tasteLispOrLEXLines (const char *line, void *data CTAGS_ATTR_UNUSED)
310 {
311 if (strcmp(line, "%{\n") == 0
312 || strcmp(line, "%top{\n") == 0
313 || strcmp(line, "%%\n") == 0)
314 return TR_LEX;
315 return TR_UNKNOWN;
316 }
317
318 const char *
selectLispOrLEXByLEXMarker(MIO * input,langType * candidates CTAGS_ATTR_UNUSED,unsigned int nCandidates CTAGS_ATTR_UNUSED)319 selectLispOrLEXByLEXMarker (MIO *input,
320 langType *candidates CTAGS_ATTR_UNUSED,
321 unsigned int nCandidates CTAGS_ATTR_UNUSED)
322 {
323 return selectByLines (input, tasteLispOrLEXLines, TR_LISP, NULL);
324 }
325
326 #ifdef HAVE_LIBXML
327
328 #include <libxml/xpath.h>
329 #include <libxml/tree.h>
330
suppressWarning(void * ctx CTAGS_ATTR_UNUSED,const char * msg CTAGS_ATTR_UNUSED,...)331 static void suppressWarning (void *ctx CTAGS_ATTR_UNUSED, const char *msg CTAGS_ATTR_UNUSED, ...)
332 {
333 }
334
335 static xmlDocPtr
xmlParseMIO(MIO * input)336 xmlParseMIO (MIO *input)
337 {
338 const unsigned char *buf;
339 size_t len;
340
341 buf = mio_memory_get_data (input, &len);
342 Assert (buf);
343
344 xmlSetGenericErrorFunc (NULL, suppressWarning);
345 xmlLineNumbersDefault (1);
346 return xmlParseMemory((const char *)buf, len);
347 }
348
349 static bool
matchXpathFileSpec(xmlDocPtr doc,xpathFileSpec * spec)350 matchXpathFileSpec (xmlDocPtr doc, xpathFileSpec *spec)
351 {
352 if (spec->rootElementName)
353 {
354 if (*spec->rootElementName == '\0')
355 {
356 /* The statement is just for keeping code symmetric.
357 Meaningless examination: a root element is
358 always there.*/
359 if (doc->children && doc->children->name)
360 return false;
361 }
362 else if (! (doc->children
363 && doc->children->name
364 && (strcmp (spec->rootElementName, (char *)doc->children->name) == 0)))
365 return false;
366 else
367 verbose (" Xml[rootElementName]== %s\n",
368 spec->rootElementName);
369 }
370
371 if (spec->nameInDTD)
372 {
373 if (*spec->nameInDTD == '\0')
374 {
375 if (doc->intSubset && doc->intSubset->name)
376 return false;
377 }
378 else if (! (doc->intSubset
379 && doc->intSubset->name
380 && (strcmp (spec->nameInDTD, (char *)doc->intSubset->name) == 0)))
381 return false;
382 else
383 verbose (" Xml[nameInDTD]== %s\n",
384 spec->nameInDTD);
385 }
386
387 if (spec->externalID)
388 {
389 if (*spec->externalID == '\0')
390 {
391 if (doc->intSubset && doc->intSubset->ExternalID)
392 return false;
393 }
394 else if (! (doc->intSubset
395 && doc->intSubset->ExternalID
396 && (strcmp (spec->externalID, (char *)doc->intSubset->ExternalID) == 0)))
397 return false;
398 else
399 verbose (" Xml[externalID]== %s\n",
400 spec->externalID);
401
402 }
403
404 if (spec->systemID)
405 {
406 if (*spec->systemID == '\0')
407 {
408 if (doc->intSubset && doc->intSubset->SystemID)
409 return false;
410 }
411 else if (! (doc->intSubset
412 && doc->intSubset->SystemID
413 && (strcmp (spec->systemID, (char *)doc->intSubset->SystemID) == 0)))
414 return false;
415 else
416 verbose (" Xml[systemID]== %s\n",
417 spec->systemID);
418 }
419
420 if (spec->rootNSPrefix)
421 {
422 if (*spec->rootNSPrefix == '\0')
423 {
424 if (doc->children && doc->children->ns && doc->children->ns->prefix)
425 return false;
426 }
427 else if (! (doc->children
428 && doc->children->ns
429 && doc->children->ns->prefix
430 && (strcmp (spec->rootNSPrefix, (char *)doc->children->ns->prefix))))
431 return false;
432 else
433 verbose (" Xml[rootNSPrefix]== %s\n",
434 spec->rootNSPrefix);
435 }
436
437 if (spec->rootNSHref)
438 {
439 if (*spec->rootNSHref == '\0')
440 {
441 if (doc->children && doc->children->ns && doc->children->ns->href)
442 return false;
443 }
444 else if (! (doc->children
445 && doc->children->ns
446 && doc->children->ns->href
447 && (strcmp (spec->rootNSHref, (char *)doc->children->ns->href) == 0)))
448 return false;
449 else
450 verbose (" Xml[rootNSHref]== %s\n",
451 spec->rootNSHref);
452 }
453 return true;
454 }
455
456 static const char *
selectParserForXmlDoc(xmlDocPtr doc,langType * candidates,unsigned int nCandidates)457 selectParserForXmlDoc (xmlDocPtr doc,
458 langType *candidates,
459 unsigned int nCandidates)
460 {
461
462 unsigned int lang_index;
463 bool xml_parser_is_in_candidate = false;;
464
465 verbose (" Xml[rootElementName]: %s\n",
466 (doc->children && doc->children->name)
467 ? ((char *)doc->children->name): "-");
468 verbose (" Xml[nameInDTD]: %s\n",
469 (doc->intSubset && doc->intSubset->name)
470 ? ((char *)doc->intSubset->name): "-");
471 verbose (" Xml[externalID]: %s\n",
472 (doc->intSubset && doc->intSubset->ExternalID)
473 ? ((char *)doc->intSubset->ExternalID): "-");
474 verbose (" Xml[systemID]: %s\n",
475 (doc->intSubset && doc->intSubset->SystemID)
476 ? ((char *)doc->intSubset->SystemID): "-");
477 verbose (" Xml[rootNSPrefix]: %s\n",
478 (doc->children && doc->children->ns && doc->children->ns->prefix)
479 ? ((char *)doc->children->ns->prefix): "-");
480 verbose (" Xml[rootNSHref]: %s\n",
481 (doc->children && doc->children->ns && doc->children->ns->href)
482 ? ((char *)doc->children->ns->href): "-");
483
484 for (lang_index = 0; lang_index < nCandidates; lang_index++)
485 {
486 unsigned int spec_index;
487 xpathFileSpec* spec;
488 unsigned int spec_count;
489
490 verbose (" lxpath examines %s\n", getLanguageName (candidates[lang_index]));
491
492 spec_count = getXpathFileSpecCount (candidates[lang_index]);
493 for (spec_index = 0; spec_index < spec_count; spec_index++)
494 {
495 spec = getXpathFileSpec (candidates[lang_index], spec_index);
496 if (matchXpathFileSpec (doc, spec))
497 return getLanguageName (candidates[lang_index]);
498 }
499
500 if (strcmp (getLanguageName (candidates[lang_index]), "XML") == 0)
501 xml_parser_is_in_candidate = true;
502 }
503
504 if (xml_parser_is_in_candidate)
505 {
506 verbose (" Use generic XML parser as fallback\n");
507 return "XML";
508 }
509
510 return NULL;
511 }
512
513 const char *
selectByXpathFileSpec(MIO * input,langType * candidates,unsigned int nCandidates)514 selectByXpathFileSpec (MIO *input,
515 langType *candidates,
516 unsigned int nCandidates)
517 {
518 xmlDocPtr doc;
519 const char *r = NULL;
520
521 doc = xmlParseMIO (input);
522 if (doc == NULL)
523 return NULL;
524
525 r = selectParserForXmlDoc (doc, candidates, nCandidates);
526
527 if (r == NULL)
528 xmlFreeDoc (doc);
529 else
530 mio_attach_user_data (input,
531 doc,(MIODestroyNotify)xmlFreeDoc);
532
533 return r;
534 }
535
536 #else
537
538 const char *
selectByXpathFileSpec(MIO * input,langType * candidates,unsigned int nCandidates)539 selectByXpathFileSpec (MIO *input,
540 langType *candidates,
541 unsigned int nCandidates)
542 {
543 return NULL;
544 }
545
546 #endif
547