xref: /Universal-ctags/parsers/perl.c (revision 08bf1b999d211d0a18de5b4e5f65ad0f8dbf1c51)
1 /*
2 *   Copyright (c) 2000-2003, Darren Hiebert
3 *
4 *   This source code is released for free distribution under the terms of the
5 *   GNU General Public License version 2 or (at your option) any later version.
6 *
7 *   This module contains functions for generating tags for PERL language
8 *   files.
9 */
10 
11 /*
12 *   INCLUDE FILES
13 */
14 #include "general.h"  /* must always come first */
15 
16 #include <string.h>
17 
18 #include "entry.h"
19 #include "perl.h"
20 #include "promise.h"
21 #include "read.h"
22 #include "routines.h"
23 #include "selectors.h"
24 #include "subparser.h"
25 #include "vstring.h"
26 #include "xtag.h"
27 
28 #define TRACE_PERL_C 0
29 #define TRACE if (TRACE_PERL_C) printf("perl.c:%d: ", __LINE__), printf
30 
31 /*
32 *   DATA DEFINITIONS
33 */
34 typedef enum PerlKindType perlKind;
35 typedef enum PerlModuleRoleType perlModuleRole;
36 
37 static roleDefinition PerlModuleRoles [] = {
38 	{ true, "used",   "specified in `use' built-in function" },
39 	{ true, "unused", "specified in `no' built-in function" },
40 };
41 
42 typedef enum {
43 	R_HEREDOC_ENDLABEL,
44 } perlHeredocRole;
45 
46 static roleDefinition PerlHeredocRoles [] = {
47 	{ true, "endmarker", "end marker" },
48 };
49 
50 static kindDefinition PerlKinds [] = {
51 	{ true,  'c', "constant",               "constants" },
52 	{ true,  'f', "format",                 "formats" },
53 	{ true,  'l', "label",                  "labels" },
54 	{ true,  'p', "package",                "packages" },
55 	{ true,  's', "subroutine",             "subroutines" },
56 	{ false, 'd', "subroutineDeclaration",  "subroutine declarations" },
57 	{ false, 'M', "module",                 "modules",
58 	  .referenceOnly = true,  ATTACH_ROLES(PerlModuleRoles)},
59 	{ false, 'h', "heredoc", "marker for here document",
60 	  .referenceOnly = false, ATTACH_ROLES (PerlHeredocRoles) },
61 };
62 
63 struct hereDocMarker {
64 	vString *marker;
65 	bool indented;
66 	int corkIndex;
67 };
68 
69 struct hereDocMarkerManager {
70 	ptrArray *markers;
71 	size_t current;
72 };
73 
74 /*
75 *   FUNCTION DEFINITIONS
76 */
77 
notifyEnteringPod()78 static void notifyEnteringPod ()
79 {
80 	subparser *sub;
81 
82 	foreachSubparser (sub, false)
83 	{
84 		perlSubparser *perlsub = (perlSubparser *)sub;
85 		if (perlsub->enteringPodNotify)
86 		{
87 			enterSubparser (sub);
88 			perlsub->enteringPodNotify (perlsub);
89 			leaveSubparser ();
90 		}
91 	}
92 }
93 
notifyLeavingPod()94 static void notifyLeavingPod ()
95 {
96 	subparser *sub;
97 
98 	foreachSubparser (sub, false)
99 	{
100 		perlSubparser *perlsub = (perlSubparser *)sub;
101 		if (perlsub->leavingPodNotify)
102 		{
103 			enterSubparser (sub);
104 			perlsub->leavingPodNotify (perlsub);
105 			leaveSubparser ();
106 		}
107 	}
108 }
109 
notifyFindingQuotedWord(int moduleIndex,const char * qwd)110 static void notifyFindingQuotedWord (int moduleIndex,
111 									 const char *qwd)
112 {
113 	subparser *sub;
114 
115 	foreachSubparser (sub, false)
116 	{
117 		perlSubparser *perlsub = (perlSubparser *)sub;
118 		if (perlsub->findingQuotedWordNotify)
119 		{
120 			enterSubparser (sub);
121 			perlsub->findingQuotedWordNotify (perlsub,
122 											  moduleIndex,
123 											  qwd);
124 			leaveSubparser ();
125 		}
126 	}
127 }
128 
isIdentifier1(int c)129 static bool isIdentifier1 (int c)
130 {
131 	return (bool) (isalpha (c) || c == '_');
132 }
133 
isIdentifier(int c)134 static bool isIdentifier (int c)
135 {
136 	return (bool) (isalnum (c) || c == '_');
137 }
138 
isPodWord(const char * word)139 static bool isPodWord (const char *word)
140 {
141 	/* Perl POD words are three to eight characters in size.  We use this
142 	 * fact to find (or not find) the right side of the word and then
143 	 * perform comparisons, if necessary, of POD words of that size.
144 	 */
145 	size_t len;
146 	for (len = 0; len < 9; ++len)
147 		if ('\0' == word[len] || ' ' == word[len] || '\t' == word[len])
148 			break;
149 	switch (len) {
150 		case 3:
151 			return 0 == strncmp(word, "end", 3)
152 				|| 0 == strncmp(word, "for", 3)
153 				|| 0 == strncmp(word, "pod", 3);
154 		case 4:
155 			return 0 == strncmp(word, "back", 4)
156 				|| 0 == strncmp(word, "item", 4)
157 				|| 0 == strncmp(word, "over", 4);
158 		case 5:
159 			return 0 == strncmp(word, "begin", 5)
160 				|| 0 == strncmp(word, "head1", 5)
161 				|| 0 == strncmp(word, "head2", 5)
162 				|| 0 == strncmp(word, "head3", 5)
163 				|| 0 == strncmp(word, "head4", 5);
164 		case 8:
165 			return 0 == strncmp(word, "encoding", 8);
166 		default:
167 			return false;
168 	}
169 }
170 
171 /*
172  * Perl subroutine declaration may look like one of the following:
173  *
174  *  sub abc;
175  *  sub abc :attr;
176  *  sub abc (proto);
177  *  sub abc (proto) :attr;
178  *
179  * Note that there may be more than one attribute.  Attributes may
180  * have things in parentheses (they look like arguments).  Anything
181  * inside of those parentheses goes.  Prototypes may contain semi-colons.
182  * The matching end when we encounter (outside of any parentheses) either
183  * a semi-colon (that'd be a declaration) or an left curly brace
184  * (definition).
185  *
186  * This is pretty complicated parsing (plus we all know that only perl can
187  * parse Perl), so we are only promising best effort here.
188  *
189  * If we can't determine what this is (due to a file ending, for example),
190  * we will return false.
191  */
isSubroutineDeclaration(const unsigned char * cp)192 static bool isSubroutineDeclaration (const unsigned char *cp)
193 {
194 	bool attr = false;
195 	int nparens = 0;
196 
197 	do {
198 		for ( ; *cp; ++cp) {
199 SUB_DECL_SWITCH:
200 			switch (*cp) {
201 				case ':':
202 					if (nparens)
203 						break;
204 					else if (true == attr)
205 						return false;    /* Invalid attribute name */
206 					else
207 						attr = true;
208 					break;
209 				case '(':
210 					++nparens;
211 					break;
212 				case ')':
213 					--nparens;
214 					break;
215 				case ' ':
216 				case '\t':
217 					break;
218 				case ';':
219 					if (!nparens)
220 						return true;
221 				case '{':
222 					if (!nparens)
223 						return false;
224 				default:
225 					if (attr) {
226 						if (isIdentifier1(*cp)) {
227 							cp++;
228 							while (isIdentifier (*cp))
229 								cp++;
230 							attr = false;
231 							goto SUB_DECL_SWITCH; /* Instead of --cp; */
232 						} else {
233 							return false;
234 						}
235 					} else if (nparens) {
236 						break;
237 					} else {
238 						return false;
239 					}
240 			}
241 		}
242 	} while (NULL != (cp = readLineFromInputFile ()));
243 
244 	return false;
245 }
246 
247 /* `end' points to the equal sign.  Parse from right to left to get the
248  * identifier.  Assume we're dealing with something of form \s*\w+\s*=>
249  */
makeTagFromLeftSide(const char * begin,const char * end,vString * name,vString * package)250 static void makeTagFromLeftSide (const char *begin, const char *end,
251 	vString *name, vString *package)
252 {
253 	tagEntryInfo entry;
254 	const char *b, *e;
255 	if (! PerlKinds[KIND_PERL_CONSTANT].enabled)
256 		return;
257 	for (e = end - 1; e > begin && isspace(*e); --e)
258 		;
259 	if (e < begin)
260 		return;
261 	for (b = e; b >= begin && isIdentifier(*b); --b)
262 		;
263 	/* Identifier must be either beginning of line of have some whitespace
264 	 * on its left:
265 	 */
266 	if (b < begin || isspace(*b) || ',' == *b)
267 		++b;
268 	else if (b != begin)
269 		return;
270 	if (e - b + 1 <= 0)
271 		return;			/* Left side of => has an invalid identifier. */
272 	vStringClear(name);
273 	vStringNCatS(name, b, e - b + 1);
274 	initTagEntry(&entry, vStringValue(name), KIND_PERL_CONSTANT);
275 	makeTagEntry(&entry);
276 	if (isXtagEnabled (XTAG_QUALIFIED_TAGS) && package && vStringLength(package)) {
277 		vStringClear(name);
278 		vStringCopy(name, package);
279 		vStringNCatS(name, b, e - b + 1);
280 		initTagEntry(&entry, vStringValue(name), KIND_PERL_CONSTANT);
281 		markTagExtraBit (&entry, XTAG_QUALIFIED_TAGS);
282 		makeTagEntry(&entry);
283 	}
284 }
285 
makeTagForModule(const char * name,int role)286 static int makeTagForModule (const char *name, int role)
287 {
288 	tagEntryInfo entry;
289 	initRefTagEntry(&entry, name, KIND_PERL_MODULE, role);
290 	return makeTagEntry(&entry);
291 }
292 
293 enum const_state { CONST_STATE_NEXT_LINE, CONST_STATE_HIT_END };
294 
295 /* Parse a single line, find as many NAME => VALUE pairs as we can and try
296  * to detect the end of the hashref.
297  */
parseConstantsFromLine(const char * cp,vString * name,vString * package)298 static enum const_state parseConstantsFromLine (const char *cp,
299 	vString *name, vString *package)
300 {
301 	while (1) {
302 		const size_t sz = strcspn(cp, "#}=");
303 		switch (cp[sz]) {
304 			case '=':
305 				if ('>' == cp[sz + 1])
306 					makeTagFromLeftSide(cp, cp + sz, name, package);
307 				break;
308 			case '}':	/* Assume this is the end of the hashref. */
309 				return CONST_STATE_HIT_END;
310 			case '\0':	/* End of the line. */
311 			case '#':	/* Assume this is a comment and thus end of the line. */
312 				return CONST_STATE_NEXT_LINE;
313 		}
314 		cp += sz + 1;
315 	}
316 }
317 
318 /* Parse constants declared via hash reference, like this:
319  * use constant {
320  *   A => 1,
321  *   B => 2,
322  * };
323  * The approach we take is simplistic, but it covers the vast majority of
324  * cases well.  There can be some false positives.
325  * Returns 0 if found the end of the hashref, -1 if we hit EOF
326  */
parseConstantsFromHashRef(const unsigned char * cp,vString * name,vString * package)327 static int parseConstantsFromHashRef (const unsigned char *cp,
328 	vString *name, vString *package)
329 {
330 	while (1) {
331 		enum const_state state =
332 			parseConstantsFromLine((const char *) cp, name, package);
333 		switch (state) {
334 			case CONST_STATE_NEXT_LINE:
335 				cp = readLineFromInputFile();
336 				if (cp)
337 					break;
338 				else
339 					return -1;
340 			case CONST_STATE_HIT_END:
341 				return 0;
342 		}
343 	}
344 }
345 
parseQuotedWords(const unsigned char * cp,vString * name,int moduleIndex)346 static void parseQuotedWords(const unsigned char *cp,
347 							 vString *name, int moduleIndex)
348 {
349 	unsigned char end = *cp++;
350 	switch (end)
351 	{
352 	case '[': end = ']'; break;
353 	case '(': end = ')'; break;
354 	case '{': end = '}'; break;
355 	case '<': end = '>'; break;
356 	}
357 
358 	do {
359 		while (*cp && *cp != end)
360 		{
361 			if (isspace(*cp))
362 			{
363 				notifyFindingQuotedWord (moduleIndex, vStringValue(name));
364 				vStringClear(name);
365 				cp++;
366 				continue;
367 			}
368 
369 			if (*cp == '\\')
370 			{
371 				cp++;
372 				if (*cp == '\0')
373 					break;
374 			}
375 
376 			vStringPut(name, *cp);
377 			cp++;
378 		}
379 		if (!vStringIsEmpty(name))
380 			notifyFindingQuotedWord (moduleIndex, vStringValue(name));
381 
382 		if (*cp == end)
383 			break;
384 	} while ((cp = readLineFromInputFile()) != NULL);
385 }
386 
387 /*
388  * Extract heredoc markers and skip the heredoc areas.
389  *
390  * - https://perldoc.perl.org/perlop#%3C%3CEOF
391  */
hereDocMarkerNew(bool indented)392 static struct hereDocMarker *hereDocMarkerNew (bool indented)
393 {
394 	struct hereDocMarker *marker = xMalloc(1, struct hereDocMarker);
395 
396 	marker->indented = indented;
397 	marker->marker = vStringNew();
398 	marker->corkIndex = CORK_NIL;
399 
400 	return marker;
401 }
402 
hereDocMarkerDelete(struct hereDocMarker * marker)403 static void hereDocMarkerDelete (struct hereDocMarker *marker)
404 {
405 	vStringDelete (marker->marker);
406 	eFree (marker);
407 }
408 
readHereDocMarker(unsigned char * line,vString * marker,unsigned char quote_char)409 static unsigned char *readHereDocMarker (unsigned char *line,
410 										 vString *marker,
411 										 unsigned char quote_char)
412 {
413 	unsigned char *cp = line;
414 	bool backslash = false;
415 
416 	for (cp = line; *cp != '\0'; cp++)
417 	{
418 		if (backslash)
419 		{
420 			vStringPut (marker, *cp);
421 			backslash = false;
422 			continue;
423 		}
424 
425 		if (quote_char == '"' && (*cp == '\\'))
426 		{
427 			backslash = true;
428 			continue;
429 		}
430 
431 		if (quote_char && *cp == quote_char)
432 		{
433 			cp++;
434 			break;
435 		}
436 
437 		if (!quote_char && !isIdentifier(*cp))
438 			break;
439 
440 		vStringPut (marker, *cp);
441 	}
442 
443 	return cp;
444 }
445 
collectHereDocMarkers(struct hereDocMarkerManager * mgr,const unsigned char * line)446 static void collectHereDocMarkers (struct hereDocMarkerManager *mgr,
447 								   const unsigned char *line)
448 {
449 	unsigned char *starter = (unsigned char*)strstr((char *)line, "<<");
450 	unsigned char *cp = NULL;
451 	bool indented = false;
452 	unsigned char quote_char = 0;
453 
454 	if (starter == NULL)
455 		return;
456 
457 	cp = starter + 2;
458 	while (isspace (*cp))
459 		cp++;
460 
461 	if (*cp == '\0')
462 		return;
463 
464 	/* Is shift operator? */
465 	if (isdigit (*cp))
466 	{
467 		/* Scan the rest of the string. */
468 		collectHereDocMarkers (mgr, ++cp);
469 		return;
470 	}
471 
472 	if (*cp == '~') {
473 		indented = true;
474 		cp++;
475 		if (*cp == '\0')
476 			return;
477 		while (isspace (*cp))
478 			cp++;
479 		if (*cp == '\0')
480 			return;
481 	}
482 
483 	switch (*cp)
484 	{
485 	case '\'':
486 	case '"':
487 	case '`':
488 		quote_char = *cp;
489 		/* Fall through */
490 	case '\\':
491 		cp++;
492 		if (*cp == '\0')
493 			return;
494 		break;
495 	default:
496 		break;
497 	}
498 
499 	struct hereDocMarker *marker = hereDocMarkerNew (indented);
500 	const unsigned char *last_cp = cp;
501 	cp = readHereDocMarker(cp, marker->marker, quote_char);
502 	if (vStringLength (marker->marker) > 0)
503 	{
504 		marker->corkIndex = makeSimpleTag (marker->marker,
505 										   KIND_PERL_HEREDOCMARKER);
506 		ptrArrayAdd (mgr->markers, marker);
507 	}
508 	else
509 		hereDocMarkerDelete (marker);
510 
511 	if (*cp != '\0' && cp != last_cp)
512 		collectHereDocMarkers (mgr, cp);
513 }
514 
isInHereDoc(struct hereDocMarkerManager * mgr,const unsigned char * line)515 static bool isInHereDoc (struct hereDocMarkerManager *mgr,
516 						 const unsigned char *line)
517 {
518 	if (ptrArrayCount (mgr->markers) == 0)
519 		return false;
520 
521 	const unsigned char *cp = line;
522 	struct hereDocMarker *current = ptrArrayItem (mgr->markers, mgr->current);
523 	if (current->indented)
524 	{
525 		while (isspace(*cp))
526 			cp++;
527 	}
528 	if (strncmp((const char *)cp, vStringValue (current->marker), vStringLength (current->marker)) == 0
529 		&& (cp [vStringLength (current->marker)] == '\0'
530 			|| (!isIdentifier (cp [vStringLength (current->marker)]))))
531 	{
532 		tagEntryInfo *tag = getEntryInCorkQueue (current->corkIndex);
533 		if (tag)
534 			tag->extensionFields.endLine = getInputLineNumber();
535 		mgr->current++;
536 		if (mgr->current == ptrArrayCount (mgr->markers))
537 		{
538 			ptrArrayClear (mgr->markers);
539 			mgr->current = 0;
540 		}
541 	}
542 	return true;
543 }
544 
initHereDocMarkerManager(struct hereDocMarkerManager * mgr)545 static void initHereDocMarkerManager(struct hereDocMarkerManager *mgr)
546 {
547 	mgr->markers = ptrArrayNew((ptrArrayDeleteFunc)hereDocMarkerDelete);
548 	mgr->current = 0;
549 }
550 
finiHereDocMarkerManager(struct hereDocMarkerManager * mgr)551 static void finiHereDocMarkerManager(struct hereDocMarkerManager *mgr)
552 {
553 	ptrArrayDelete (mgr->markers);
554 	mgr->markers = NULL;
555 	mgr->current = 0;
556 }
557 
558 /* Algorithm adapted from from GNU etags.
559  * Perl support by Bart Robinson <lomew@cs.utah.edu>
560  * Perl sub names: look for /^ [ \t\n]sub [ \t\n]+ [^ \t\n{ (]+/
561  */
findPerlTags(void)562 static void findPerlTags (void)
563 {
564 	vString *name = vStringNew ();
565 	vString *package = NULL;
566 	bool skipPodDoc = false;
567 	const unsigned char *line;
568 	unsigned long podStart = 0UL;
569 
570 	/* A pod area can be after __END__ marker.
571 	 * Perl parser itself doesn't need to parse the area
572 	 * after the marker. Parsing the area is needed only
573 	 * if Perl parser runs Pod parser as a guest.
574 	 * This variable is set true when it is needed.
575 	 */
576 	bool parse_only_pod_area = false;
577 
578 	/* Core modules AutoLoader and SelfLoader support delayed compilation
579 	 * by allowing Perl code that follows __END__ and __DATA__ tokens,
580 	 * respectively.  When we detect that one of these modules is used
581 	 * in the file, we continue processing even after we see the
582 	 * corresponding token that would usually terminate parsing of the
583 	 * file.
584 	 */
585 	enum {
586 		RESPECT_END		= (1 << 0),
587 		RESPECT_DATA	= (1 << 1),
588 	} respect_token = RESPECT_END | RESPECT_DATA;
589 
590 	struct hereDocMarkerManager hdoc_mgr;
591 	initHereDocMarkerManager (&hdoc_mgr);
592 
593 	while ((line = readLineFromInputFile ()) != NULL)
594 	{
595 		bool spaceRequired = false;
596 		bool qualified = false;
597 		const unsigned char *cp = line;
598 		perlKind kind = KIND_PERL_NONE;
599 		tagEntryInfo e;
600 
601 		if (isInHereDoc (&hdoc_mgr, line))
602 			continue;
603 
604 		if (skipPodDoc)
605 		{
606 			if (strncmp ((const char*) line, "=cut", (size_t) 4) == 0)
607 			{
608 				skipPodDoc = false;
609 				if (podStart != 0UL)
610 				{
611 					notifyLeavingPod ();
612 					makePromise ("Pod",
613 						     podStart, 0,
614 						     getInputLineNumber(), 0,
615 						     getSourceLineNumber());
616 					podStart = 0UL;
617 				}
618 			}
619 			continue;
620 		}
621 		else if (line [0] == '=')
622 		{
623 			skipPodDoc = isPodWord ((const char*)line + 1);
624 			if (skipPodDoc)
625 			{
626 				podStart = getSourceLineNumber ();
627 				notifyEnteringPod ();
628 			}
629 			continue;
630 		}
631 		else if (strcmp ((const char*) line, "__DATA__") == 0)
632 		{
633 			if (respect_token & RESPECT_DATA)
634 			{
635 				if (isXtagEnabled (XTAG_GUEST))
636 					parse_only_pod_area = true;
637 				else
638 					break;
639 			}
640 			else
641 				continue;
642 		}
643 		else if (strcmp ((const char*) line, "__END__") == 0)
644 		{
645 			if (respect_token & RESPECT_END)
646 			{
647 				if (isXtagEnabled (XTAG_GUEST))
648 					parse_only_pod_area = true;
649 				else
650 					break;
651 			}
652 			else
653 				continue;
654 		}
655 		else if (line [0] == '#')
656 			continue;
657 
658 		if (parse_only_pod_area)
659 			continue;
660 
661 		while (isspace (*cp))
662 			cp++;
663 
664 		collectHereDocMarkers (&hdoc_mgr, cp);
665 
666 		if (strncmp((const char*) cp, "sub", (size_t) 3) == 0)
667 		{
668 			TRACE("this looks like a sub\n");
669 			cp += 3;
670 			kind = KIND_PERL_SUBROUTINE;
671 			spaceRequired = true;
672 			qualified = true;
673 		}
674 		else if (strncmp((const char*) cp, "use", (size_t) 3) == 0)
675 		{
676 			cp += 3;
677 			if (!isspace(*cp))
678 				continue;
679 			while (*cp && isspace (*cp))
680 				++cp;
681 			if (strncmp((const char*) cp, "AutoLoader", (size_t) 10) == 0) {
682 				respect_token &= ~RESPECT_END;
683 				makeTagForModule("AutoLoader", ROLE_PERL_MODULE_USED);
684 				continue;
685 			}
686 			if (strncmp((const char*) cp, "SelfLoader", (size_t) 10) == 0) {
687 				respect_token &= ~RESPECT_DATA;
688 				makeTagForModule("SelfLoader", ROLE_PERL_MODULE_USED);
689 				continue;
690 			}
691 
692 			vString *module = NULL;
693 			while (isalnum(*cp) || *cp == ':' || *cp == '.') {
694 				if (!module)
695 					module = vStringNew();
696 				vStringPut(module, *cp);
697 				++cp;
698 			}
699 			if (!module)
700 				continue;
701 
702 			int q = makeTagForModule(vStringValue(module), ROLE_PERL_MODULE_USED);
703 			bool isConstant = (strcmp(vStringValue(module), "constant") == 0);
704 			vStringDelete(module);
705 			if (!isConstant)
706 			{
707 				while (isspace(*cp))
708 					cp++;
709 				if (strncmp("qw", (const char *)cp, 2) != 0)
710 					continue;
711 				cp += 2;
712 				while (isspace(*cp))
713 					cp++;
714 				if (*cp == '\0')
715 					continue;
716 				vStringClear (name);
717 
718 				parseQuotedWords(cp, name, q);
719 				vStringClear (name);
720 				continue;
721 			}
722 
723 			/* Skip up to the first non-space character, skipping empty
724 			 * and comment lines.
725 			 */
726 			while (isspace(*cp))
727 				cp++;
728 			while (!*cp || '#' == *cp) {
729 				cp = readLineFromInputFile ();
730 				if (!cp)
731 					goto END_MAIN_WHILE;
732 				while (isspace (*cp))
733 					cp++;
734 			}
735 			if ('{' == *cp) {
736 				++cp;
737 				if (0 == parseConstantsFromHashRef(cp, name, package)) {
738 					vStringClear(name);
739 					continue;
740 				} else
741 					goto END_MAIN_WHILE;
742 			}
743 			kind = KIND_PERL_CONSTANT;
744 			spaceRequired = false;
745 			qualified = true;
746 		}
747 		else if (strncmp((const char*) cp, "no", (size_t) 2) == 0 && isspace(cp[2]))
748 		{
749 			cp += 3;
750 			while (isspace (*cp))
751 				cp++;
752 			vString *module = NULL;
753 			while (isalnum(*cp) || *cp == ':' || *cp == '.') {
754 				if (!module)
755 					module = vStringNew();
756 				vStringPut(module, *cp);
757 				++cp;
758 			}
759 			if (module) {
760 				makeTagForModule(vStringValue(module), ROLE_PERL_MODULE_UNUSED);
761 				vStringDelete(module);
762 			}
763 			continue;
764 		}
765 		else if (strncmp((const char*) cp, "package", (size_t) 7) == 0 &&
766 				 ('\0' == cp[7] || isspace(cp[7])))
767 		{
768 			cp += 7;
769 			while (isspace (*cp))
770 				cp++;
771 			while (!*cp || '#' == *cp) {
772 				cp = readLineFromInputFile ();
773 				if (!cp)
774 					goto END_MAIN_WHILE;
775 				while (isspace (*cp))
776 					cp++;
777 			}
778 			if (package == NULL)
779 				package = vStringNew ();
780 			else
781 				vStringClear (package);
782 			const unsigned char *const first = cp;
783 			while (*cp && (int) *cp != ';'  &&  !isspace ((int) *cp))
784 			{
785 				vStringPut (package, (int) *cp);
786 				cp++;
787 			}
788 			vStringCatS (package, "::");
789 
790 			cp = first;	 /* Rewind */
791 			kind = KIND_PERL_PACKAGE;
792 			spaceRequired = false;
793 			qualified = true;
794 		}
795 		else if (strncmp((const char*) cp, "format", (size_t) 6) == 0)
796 		{
797 			cp += 6;
798 			kind = KIND_PERL_FORMAT;
799 			spaceRequired = true;
800 			qualified = true;
801 		}
802 		else
803 		{
804 			if (isIdentifier1 (*cp))
805 			{
806 				const unsigned char *p = cp;
807 				while (isIdentifier (*p))
808 					++p;
809 				while (isspace (*p))
810 					++p;
811 				if ((int) *p == ':' && (int) *(p + 1) != ':')
812 					kind = KIND_PERL_LABEL;
813 			}
814 		}
815 		if (kind != KIND_PERL_NONE)
816 		{
817 			TRACE("cp0: %s\n", (const char *) cp);
818 			if (spaceRequired && *cp && !isspace (*cp))
819 				continue;
820 
821 			TRACE("cp1: %s\n", (const char *) cp);
822 			while (isspace (*cp))
823 				cp++;
824 
825 			while (!*cp || '#' == *cp) { /* Gobble up empty lines
826 				                            and comments */
827 				cp = readLineFromInputFile ();
828 				if (!cp)
829 					goto END_MAIN_WHILE;
830 				while (isspace (*cp))
831 					cp++;
832 			}
833 
834 			while (isIdentifier (*cp) || (KIND_PERL_PACKAGE == kind && ':' == *cp))
835 			{
836 				vStringPut (name, (int) *cp);
837 				cp++;
838 			}
839 
840 			if (KIND_PERL_FORMAT == kind &&
841 				vStringLength (name) == 0 && /* cp did not advance */
842 				'=' == *cp)
843 			{
844 				/* format's name is optional.  If it's omitted, 'STDOUT'
845 				   is assumed. */
846 				vStringCatS (name, "STDOUT");
847 			}
848 
849 			TRACE("name: %s\n", vStringValue (name));
850 
851 			if (0 == vStringLength(name)) {
852 				vStringClear(name);
853 				continue;
854 			}
855 
856 			if (KIND_PERL_SUBROUTINE == kind)
857 			{
858 				/*
859 				 * isSubroutineDeclaration() may consume several lines.  So
860 				 * we record line positions.
861 				 */
862 				initTagEntry(&e, vStringValue(name), KIND_GHOST_INDEX);
863 
864 				if (true == isSubroutineDeclaration(cp)) {
865 					if (true == PerlKinds[KIND_PERL_SUBROUTINE_DECLARATION].enabled) {
866 						kind = KIND_PERL_SUBROUTINE_DECLARATION;
867 					} else {
868 						vStringClear (name);
869 						continue;
870 					}
871 				} else if (! PerlKinds[kind].enabled) {
872 					continue;
873 				}
874 
875 				e.kindIndex = kind;
876 
877 				makeTagEntry(&e);
878 
879 				if (isXtagEnabled (XTAG_QUALIFIED_TAGS) && qualified &&
880 					package != NULL  && vStringLength (package) > 0)
881 				{
882 					vString *const qualifiedName = vStringNew ();
883 					vStringCopy (qualifiedName, package);
884 					vStringCat (qualifiedName, name);
885 					e.name = vStringValue(qualifiedName);
886 					markTagExtraBit (&e, XTAG_QUALIFIED_TAGS);
887 					makeTagEntry(&e);
888 					vStringDelete (qualifiedName);
889 				}
890 			} else if (vStringLength (name) > 0)
891 			{
892 				makeSimpleTag (name, kind);
893 				if (isXtagEnabled(XTAG_QUALIFIED_TAGS) && qualified &&
894 					KIND_PERL_PACKAGE != kind &&
895 					package != NULL  && vStringLength (package) > 0)
896 				{
897 					tagEntryInfo fqe;
898 					vString *const qualifiedName = vStringNew ();
899 					vStringCopy (qualifiedName, package);
900 					vStringCat (qualifiedName, name);
901 					initTagEntry (&fqe, vStringValue (qualifiedName), kind);
902 					markTagExtraBit (&fqe, XTAG_QUALIFIED_TAGS);
903 					makeTagEntry (&fqe);
904 					vStringDelete (qualifiedName);
905 				}
906 			}
907 			vStringClear (name);
908 		}
909 	}
910 
911 END_MAIN_WHILE:
912 	vStringDelete (name);
913 	finiHereDocMarkerManager (&hdoc_mgr);
914 	if (package != NULL)
915 		vStringDelete (package);
916 }
917 
PerlParser(void)918 extern parserDefinition* PerlParser (void)
919 {
920 	static const char *const extensions [] = { "pl", "pm", "ph", "plx", "perl", NULL };
921 	static const char *const aliases [] = {
922 		/* cperl is an Emacs' editing mode for Perl source code  */
923 		"cperl",
924 		NULL };
925 	static selectLanguage selectors [] = { selectByPickingPerlVersion,
926 					       NULL };
927 	parserDefinition* def = parserNew ("Perl");
928 	def->kindTable      = PerlKinds;
929 	def->kindCount  = ARRAY_SIZE (PerlKinds);
930 	def->extensions = extensions;
931 	def->parser     = findPerlTags;
932 	def->selectLanguage = selectors;
933 	def->aliases    = aliases;
934 
935 	/* Subparsers need this */
936 	def->useCork = CORK_QUEUE;
937 
938 	return def;
939 }
940