xref: /Universal-ctags/main/read.c (revision 9128cdba03f06a84dcbe056af23316712f0db1b7)
1 /*
2 *   Copyright (c) 1996-2002, Darren Hiebert
3 *
4 *   This source code is released for free distribution under the terms of the
5 *   GNU General Public License version 2 or (at your option) any later version.
6 *
7 *   This module contains low level input and tag file read functions (newline
8 *   conversion for input files are performed at this level).
9 */
10 
11 /*
12 *   INCLUDE FILES
13 */
14 #include "general.h"  /* must always come first */
15 
16 #include <string.h>
17 #include <ctype.h>
18 #include <stdlib.h>
19 
20 #define FILE_WRITE
21 #include "read.h"
22 #include "read_p.h"
23 #include "debug.h"
24 #include "entry_p.h"
25 #include "routines.h"
26 #include "routines_p.h"
27 #include "options_p.h"
28 #include "parse_p.h"
29 #include "promise_p.h"
30 #include "stats_p.h"
31 #include "trace.h"
32 #include "trashbox.h"
33 #ifdef HAVE_ICONV
34 # include "mbcs.h"
35 # include "mbcs_p.h"
36 #endif
37 
38 /*
39 *   DATA DECLARATIONS
40 */
41 
42 typedef struct sLangStack {
43 	langType *languages;
44 	unsigned int count;
45 	unsigned int size;
46 } langStack;
47 
48 /*  Maintains the state of the current input file.
49  */
50 typedef union sInputLangInfo {
51 	langStack stack;
52 	langType  type;
53 } inputLangInfo;
54 
55 typedef struct sInputFileInfo {
56 	vString *name;           /* name to report for input file */
57 	vString *tagPath;        /* path of input file relative to tag file */
58 	unsigned long lineNumber;/* line number in the input file */
59 	unsigned long lineNumberOrigin; /* The value set to `lineNumber'
60 					   when `resetInputFile' is called
61 					   on the input stream.
62 					   This is needed for nested stream. */
63 	bool isHeader;           /* is input file a header file? */
64 } inputFileInfo;
65 
66 typedef struct sComputPos {
67 	MIOPos  pos;
68 	long    offset;
69 	bool open;
70 	int crAdjustment;
71 } compoundPos;
72 
73 typedef struct sInputLineFposMap {
74 	compoundPos *pos;
75 	unsigned int count;
76 	unsigned int size;
77 } inputLineFposMap;
78 
79 typedef struct sNestedInputStreamInfo {
80 	unsigned long startLine;
81 	long startCharOffset;
82 	unsigned long endLine;
83 	long endCharOffset;
84 } nestedInputStreamInfo;
85 
86 typedef struct sInputFile {
87 	vString    *path;          /* path of input file (if any) */
88 	vString    *line;          /* last line read from file */
89 	const unsigned char* currentLine;  /* current line being worked on */
90 	MIO        *mio;           /* MIO stream used for reading the file */
91 	compoundPos    filePosition;  /* file position of current line */
92 	unsigned int ungetchIdx;
93 	int         ungetchBuf[8]; /* characters that were ungotten */
94 
95 	bool bomFound;
96 	/*  Contains data pertaining to the original `source' file in which the tag
97 	 *  was defined. This may be different from the `input' file when #line
98 	 *  directives are processed (i.e. the input file is preprocessor output).
99 	 */
100 	inputFileInfo input; /* name, lineNumber */
101 	inputFileInfo source;
102 
103 	nestedInputStreamInfo nestedInputStreamInfo;
104 
105 	/* sourceTagPathHolder is a kind of trash box.
106 	   The buffer pointed by tagPath field of source field can
107 	   be referred from tagsEntryInfo instances. sourceTagPathHolder
108 	   is used keeping the buffer till all processing about the current
109 	   input file is done. After all processing is done, the buffers
110 	   in sourceTagPathHolder are destroyed. */
111 	stringList  * sourceTagPathHolder;
112 	inputLineFposMap lineFposMap;
113 	vString *allLines;
114 	int thinDepth;
115 	time_t mtime;
116 } inputFile;
117 
118 static inputLangInfo inputLang;
119 static langType sourceLang;
120 
121 /*
122 *   FUNCTION DECLARATIONS
123 */
124 static void     langStackInit (langStack *langStack);
125 static langType langStackTop  (langStack *langStack);
126 static langType langStackBotom(langStack *langStack);
127 static void     langStackPush (langStack *langStack, langType type);
128 static langType langStackPop  (langStack *langStack);
129 static void     langStackClear(langStack *langStack);
130 
131 
132 /*
133 *   DATA DEFINITIONS
134 */
135 static inputFile File;  /* static read through functions */
136 static inputFile BackupFile;	/* File is copied here when a nested parser is pushed */
137 static compoundPos StartOfLine;  /* holds deferred position of start of line */
138 
139 /*
140 *   FUNCTION DEFINITIONS
141 */
142 
getInputLineNumber(void)143 extern unsigned long getInputLineNumber (void)
144 {
145 	return File.input.lineNumber;
146 }
147 
getInputLineOffset(void)148 extern int getInputLineOffset (void)
149 {
150 	unsigned char *base = (unsigned char *) vStringValue (File.line);
151 	int ret;
152 
153 	if (File.currentLine)
154 		ret = File.currentLine - base - File.ungetchIdx;
155 	else if (File.input.lineNumber)
156 	{
157 		/* When EOF is saw, currentLine is set to NULL.
158 		 * So the way to calculate the offset at the end of file is tricky.
159 		 */
160 		ret = (mio_tell (File.mio) - (File.bomFound? 3: 0))
161 			- getInputFileOffsetForLine(File.input.lineNumber);
162 	}
163 	else
164 	{
165 		/* At the first line of file. */
166 		ret = mio_tell (File.mio) - (File.bomFound? 3: 0);
167 	}
168 
169 	return ret >= 0 ? ret : 0;
170 }
171 
getInputFileName(void)172 extern const char *getInputFileName (void)
173 {
174 	if (!File.input.name)
175 		return NULL;
176 	return vStringValue (File.input.name);
177 }
178 
getInputFilePosition(void)179 extern MIOPos getInputFilePosition (void)
180 {
181 	return File.filePosition.pos;
182 }
183 
getInputFileCompoundPosForLine(unsigned int line)184 static compoundPos* getInputFileCompoundPosForLine (unsigned int line)
185 {
186 	int index;
187 	if (line > 0)
188 	{
189 		if (File.lineFposMap.count > (line - 1))
190 			index = line - 1;
191 		else if (File.lineFposMap.count != 0)
192 			index = File.lineFposMap.count - 1;
193 		else
194 			index = 0;
195 	}
196 	else
197 		index = 0;
198 
199 	return File.lineFposMap.pos + index;
200 }
201 
getInputFilePositionForLine(unsigned int line)202 extern MIOPos getInputFilePositionForLine (unsigned int line)
203 {
204 	compoundPos *cpos = getInputFileCompoundPosForLine (line);
205 	return cpos->pos;
206 }
207 
getInputFileOffsetForLine(unsigned int line)208 extern long getInputFileOffsetForLine (unsigned int line)
209 {
210 	compoundPos *cpos = getInputFileCompoundPosForLine (line);
211 	return cpos->offset - (File.bomFound? 3: 0);
212 }
213 
getInputLanguage(void)214 extern langType getInputLanguage (void)
215 {
216 	return langStackTop (&inputLang.stack);
217 }
218 
getInputLanguageName(void)219 extern const char *getInputLanguageName (void)
220 {
221 	return getLanguageName (getInputLanguage());
222 }
223 
getInputFileTagPath(void)224 extern const char *getInputFileTagPath (void)
225 {
226 	return vStringValue (File.input.tagPath);
227 }
228 
isInputLanguage(langType lang)229 extern bool isInputLanguage (langType lang)
230 {
231 	return (bool)((lang) == getInputLanguage ());
232 }
233 
isInputHeaderFile(void)234 extern bool isInputHeaderFile (void)
235 {
236 	return File.input.isHeader;
237 }
238 
isInputLanguageKindEnabled(int kindIndex)239 extern bool isInputLanguageKindEnabled (int kindIndex)
240 {
241 	return isLanguageKindEnabled (getInputLanguage (), kindIndex);
242 }
243 
isInputLanguageRoleEnabled(int kindIndex,int roleIndex)244 extern bool isInputLanguageRoleEnabled (int kindIndex, int roleIndex)
245 {
246 	return isLanguageRoleEnabled (getInputLanguage (),
247 								  kindIndex, roleIndex);
248 }
249 
countInputLanguageKinds(void)250 extern unsigned int countInputLanguageKinds (void)
251 {
252 	return countLanguageKinds (getInputLanguage ());
253 }
254 
countInputLanguageRoles(int kindIndex)255 extern unsigned int countInputLanguageRoles (int kindIndex)
256 {
257 	return countLanguageRoles (getInputLanguage (), kindIndex);
258 }
259 
doesInputLanguageAllowNullTag(void)260 extern bool doesInputLanguageAllowNullTag (void)
261 {
262 	return doesLanguageAllowNullTag (getInputLanguage ());
263 }
264 
doesInputLanguageRequestAutomaticFQTag(const tagEntryInfo * e)265 extern bool doesInputLanguageRequestAutomaticFQTag (const tagEntryInfo *e)
266 {
267 	return doesLanguageRequestAutomaticFQTag (e->langType);
268 }
269 
getSourceFileTagPath(void)270 extern const char *getSourceFileTagPath (void)
271 {
272 	return vStringValue (File.source.tagPath);
273 }
274 
getSourceLanguage(void)275 extern langType getSourceLanguage (void)
276 {
277 	return sourceLang;
278 }
279 
getSourceLineNumber(void)280 extern unsigned long getSourceLineNumber (void)
281 {
282 	return File.source.lineNumber;
283 }
284 
freeInputFileInfo(inputFileInfo * finfo)285 static void freeInputFileInfo (inputFileInfo *finfo)
286 {
287 	if (finfo->name)
288 	{
289 		vStringDelete (finfo->name);
290 		finfo->name = NULL;
291 	}
292 	if (finfo->tagPath)
293 	{
294 		vStringDelete (finfo->tagPath);
295 		finfo->tagPath = NULL;
296 	}
297 }
298 
freeInputFileResources(void)299 extern void freeInputFileResources (void)
300 {
301 	if (File.path != NULL)
302 		vStringDelete (File.path);
303 	if (File.line != NULL)
304 		vStringDelete (File.line);
305 	freeInputFileInfo (&File.input);
306 	freeInputFileInfo (&File.source);
307 }
308 
getInputFileData(size_t * size)309 extern const unsigned char *getInputFileData (size_t *size)
310 {
311 	return mio_memory_get_data (File.mio, size);
312 }
313 
314 /*
315  * inputLineFposMap related functions
316  */
freeLineFposMap(inputLineFposMap * lineFposMap)317 static void freeLineFposMap (inputLineFposMap *lineFposMap)
318 {
319 	if (lineFposMap->pos)
320 	{
321 		eFree (lineFposMap->pos);
322 		lineFposMap->pos = NULL;
323 		lineFposMap->count = 0;
324 		lineFposMap->size = 0;
325 	}
326 }
327 
allocLineFposMap(inputLineFposMap * lineFposMap)328 static void allocLineFposMap (inputLineFposMap *lineFposMap)
329 {
330 #define INITIAL_lineFposMap_LEN 256
331 	lineFposMap->pos = xCalloc (INITIAL_lineFposMap_LEN, compoundPos);
332 	lineFposMap->size = INITIAL_lineFposMap_LEN;
333 	lineFposMap->count = 0;
334 }
335 
appendLineFposMap(inputLineFposMap * lineFposMap,compoundPos * pos,bool crAdjustment)336 static void appendLineFposMap (inputLineFposMap *lineFposMap, compoundPos *pos,
337 							   bool crAdjustment)
338 {
339 	int lastCrAdjustment = 0;
340 
341 	if (lineFposMap->size == lineFposMap->count)
342 	{
343 		lineFposMap->size *= 2;
344 		lineFposMap->pos = xRealloc (lineFposMap->pos,
345 					     lineFposMap->size,
346 					     compoundPos);
347 	}
348 
349 	if (lineFposMap->count != 0)
350 	{
351 		lineFposMap->pos [lineFposMap->count - 1].open = false;
352 		lastCrAdjustment = lineFposMap->pos [lineFposMap->count - 1].crAdjustment;
353 	}
354 
355 	lineFposMap->pos [lineFposMap->count] = *pos;
356 	lineFposMap->pos [lineFposMap->count].open = true;
357 	lineFposMap->pos [lineFposMap->count].crAdjustment
358 		= lastCrAdjustment + ((crAdjustment)? 1: 0);
359 	lineFposMap->count++;
360 }
361 
compoundPosForOffset(const void * oft,const void * p)362 static int compoundPosForOffset (const void* oft, const void *p)
363 {
364 	long offset = *(long *)oft;
365 	const compoundPos *pos = p;
366 	const compoundPos *next = (compoundPos *)(((char *)pos) + sizeof (compoundPos));
367 
368 	if (offset < (pos->offset - pos->crAdjustment))
369 		return -1;
370 	else if (((pos->offset - pos->crAdjustment) <= offset)
371 		 && (pos->open
372 		     || (offset < (next->offset - next->crAdjustment))))
373 		return 0;
374 	else
375 		return 1;
376 }
377 
getInputLineNumberForFileOffset(long offset)378 extern unsigned long getInputLineNumberForFileOffset(long offset)
379 {
380 	compoundPos *p;
381 
382 	if (File.bomFound)
383 		offset += 3;
384 
385 	p = bsearch (&offset, File.lineFposMap.pos, File.lineFposMap.count, sizeof (compoundPos),
386 		     compoundPosForOffset);
387 	if (p == NULL)
388 		return 1;	/* TODO: 0? */
389 	else
390 		return 1 + (p - File.lineFposMap.pos);
391 }
392 
393 /*
394  *   Input file access functions
395  */
396 
setOwnerDirectoryOfInputFile(const char * const fileName)397 static void setOwnerDirectoryOfInputFile (const char *const fileName)
398 {
399 	const char *const head = fileName;
400 	const char *const tail = baseFilename (head);
401 
402 	if (File.path != NULL)
403 		vStringDelete (File.path);
404 	if (tail == head)
405 		File.path = NULL;
406 	else
407 	{
408 		const size_t length = tail - head - 1;
409 		File.path = vStringNew ();
410 		vStringNCopyS (File.path, fileName, length);
411 	}
412 }
413 
setInputFileParametersCommon(inputFileInfo * finfo,vString * const fileName,const langType language,stringList * holder)414 static void setInputFileParametersCommon (inputFileInfo *finfo, vString *const fileName,
415 					  const langType language,
416 					  stringList *holder)
417 {
418 	if (finfo->name != NULL)
419 		vStringDelete (finfo->name);
420 	finfo->name = fileName;
421 
422 	if (finfo->tagPath != NULL)
423 	{
424 		if (holder)
425 			stringListAdd (holder, finfo->tagPath);
426 		else
427 			vStringDelete (finfo->tagPath);
428 	}
429 
430 	if (0)
431 		;
432 	else if (  Option.tagRelative == TREL_ALWAYS )
433 		finfo->tagPath =
434 			vStringNewOwn (relativeFilename (vStringValue (fileName),
435 							 getTagFileDirectory ()));
436 	else if ( Option.tagRelative == TREL_NEVER )
437 		finfo->tagPath =
438 			vStringNewOwn (absoluteFilename (vStringValue (fileName)));
439 	else if ( Option.tagRelative == TREL_NO || isAbsolutePath (vStringValue (fileName)) )
440 		finfo->tagPath = vStringNewCopy (fileName);
441 	else
442 		finfo->tagPath =
443 			vStringNewOwn (relativeFilename (vStringValue (fileName),
444 							 getTagFileDirectory ()));
445 
446 	finfo->isHeader = isIncludeFile (vStringValue (fileName));
447 }
448 
resetLangOnStack(inputLangInfo * langInfo,langType lang)449 static void resetLangOnStack (inputLangInfo *langInfo, langType lang)
450 {
451 	Assert (langInfo->stack.count > 0);
452 	langStackClear  (& (langInfo->stack));
453 	langStackPush (& (langInfo->stack), lang);
454 }
455 
baseLangOnStack(inputLangInfo * langInfo)456 extern langType baseLangOnStack (inputLangInfo *langInfo)
457 {
458 	Assert (langInfo->stack.count > 0);
459 	return langStackBotom (& (langInfo->stack));
460 }
461 
pushLangOnStack(inputLangInfo * langInfo,langType lang)462 static void pushLangOnStack (inputLangInfo *langInfo, langType lang)
463 {
464 	langStackPush (& langInfo->stack, lang);
465 }
466 
popLangOnStack(inputLangInfo * langInfo)467 static langType popLangOnStack (inputLangInfo *langInfo)
468 {
469 	return langStackPop (& langInfo->stack);
470 }
471 
clearLangOnStack(inputLangInfo * langInfo)472 static void clearLangOnStack (inputLangInfo *langInfo)
473 {
474 	langStackClear (& langInfo->stack);
475 }
476 
setInputFileParameters(vString * const fileName,const langType language)477 static void setInputFileParameters (vString *const fileName, const langType language)
478 {
479 	setInputFileParametersCommon (&File.input, fileName,
480 				      language, NULL);
481 	pushLangOnStack(&inputLang, language);
482 }
483 
setSourceFileParameters(vString * const fileName,const langType language)484 static void setSourceFileParameters (vString *const fileName, const langType language)
485 {
486 	setInputFileParametersCommon (&File.source, fileName,
487 				      language, File.sourceTagPathHolder);
488 	sourceLang = language;
489 }
490 
setSourceFileName(vString * const fileName)491 static bool setSourceFileName (vString *const fileName)
492 {
493 	const langType language = getLanguageForFilenameAndContents (vStringValue (fileName));
494 	bool result = false;
495 	if (language != LANG_IGNORE)
496 	{
497 		vString *pathName;
498 		if (isAbsolutePath (vStringValue (fileName)) || File.path == NULL)
499 			pathName = vStringNewCopy (fileName);
500 		else
501 		{
502 			char *tmp = combinePathAndFile (
503 				vStringValue (File.path), vStringValue (fileName));
504 			pathName = vStringNewOwn (tmp);
505 		}
506 		setSourceFileParameters (pathName, language);
507 		result = true;
508 	}
509 	return result;
510 }
511 
512 /*
513  *   Line directive parsing
514  */
515 
skipWhite(char ** str)516 static void skipWhite (char **str)
517 {
518 	while (**str == ' '  ||  **str == '\t')
519 		(*str)++;
520 }
521 
readLineNumber(char ** str)522 static unsigned long readLineNumber (char **str)
523 {
524 	char *s;
525 	unsigned long lNum = 0;
526 
527 	skipWhite (str);
528 	s = *str;
529 	while (*s != '\0' && isdigit (*s))
530 	{
531 		lNum = (lNum * 10) + (*s - '0');
532 		s++;
533 	}
534 	if (*s != ' ' && *s != '\t')
535 		lNum = 0;
536 	*str = s;
537 
538 	return lNum;
539 }
540 
541 /* While ANSI only permits lines of the form:
542  *   # line n "filename"
543  * Earlier compilers generated lines of the form
544  *   # n filename
545  * GNU C will output lines of the form:
546  *   # n "filename"
547  * So we need to be fairly flexible in what we accept.
548  */
readFileName(char * s)549 static vString *readFileName (char *s)
550 {
551 	vString *const fileName = vStringNew ();
552 	bool quoteDelimited = false;
553 
554 	skipWhite (&s);
555 	if (*s == '"')
556 	{
557 		s++;  /* skip double-quote */
558 		quoteDelimited = true;
559 	}
560 	while (*s != '\0'  &&  *s != '\n'  &&
561 			(quoteDelimited ? (*s != '"') : (*s != ' '  &&  *s != '\t')))
562 	{
563 		vStringPut (fileName, *s);
564 		s++;
565 	}
566 	vStringPut (fileName, '\0');
567 
568 	return fileName;
569 }
570 
parseLineDirective(char * s)571 static bool parseLineDirective (char *s)
572 {
573 	bool result = false;
574 
575 	skipWhite (&s);
576 	DebugStatement ( const char* lineStr = ""; )
577 
578 	if (isdigit (*s))
579 		result = true;
580 	else if (strncmp (s, "line", 4) == 0)
581 	{
582 		s += 4;
583 		if (*s == ' '  ||  *s == '\t')
584 		{
585 			DebugStatement ( lineStr = "line"; )
586 			result = true;
587 		}
588 	}
589 	if (result)
590 	{
591 		const unsigned long lNum = readLineNumber (&s);
592 		if (lNum == 0)
593 			result = false;
594 		else
595 		{
596 			vString *const fileName = readFileName (s);
597 			if (vStringLength (fileName) == 0)
598 			{
599 				File.source.lineNumber = lNum - 1;  /* applies to NEXT line */
600 				DebugStatement ( debugPrintf (DEBUG_RAW, "#%s %ld", lineStr, lNum); )
601 			}
602 			else if (setSourceFileName (fileName))
603 			{
604 				File.source.lineNumber = lNum - 1;  /* applies to NEXT line */
605 				DebugStatement ( debugPrintf (DEBUG_RAW, "#%s %ld \"%s\"",
606 								lineStr, lNum, vStringValue (fileName)); )
607 			}
608 
609 			if (vStringLength (fileName) > 0 &&
610 				lNum == 1)
611 				makeFileTag (vStringValue (fileName));
612 			vStringDelete (fileName);
613 			result = true;
614 		}
615 	}
616 	return result;
617 }
618 
619 /*
620  *   Input file I/O operations
621  */
622 #ifdef DEBUG
623 #define MAX_IN_MEMORY_FILE_SIZE 0
624 #else
625 #define MAX_IN_MEMORY_FILE_SIZE (1024*1024)
626 #endif
627 
getMioFull(const char * const fileName,const char * const openMode,bool memStreamRequired,time_t * mtime)628 static MIO *getMioFull (const char *const fileName, const char *const openMode,
629 		    bool memStreamRequired, time_t *mtime)
630 {
631 	FILE *src;
632 	fileStatus *st;
633 	unsigned long size;
634 	unsigned char *data;
635 
636 	st = eStat (fileName);
637 	size = st->size;
638 	if (mtime)
639 		*mtime = st->mtime;
640 	eStatFree (st);
641 	if ((!memStreamRequired)
642 	    && (size > MAX_IN_MEMORY_FILE_SIZE || size == 0))
643 		return mio_new_file (fileName, openMode);
644 
645 	src = fopen (fileName, openMode);
646 	if (!src)
647 		return NULL;
648 
649 	data = eMalloc (size);
650 	if (fread (data, 1, size, src) != size)
651 	{
652 		eFree (data);
653 		fclose (src);
654 		if (memStreamRequired)
655 			return NULL;
656 		else
657 			return mio_new_file (fileName, openMode);
658 	}
659 	fclose (src);
660 	return mio_new_memory (data, size, eRealloc, eFreeNoNullCheck);
661 }
662 
getMio(const char * const fileName,const char * const openMode,bool memStreamRequired)663 extern MIO *getMio (const char *const fileName, const char *const openMode,
664 		    bool memStreamRequired)
665 {
666 	return getMioFull (fileName, openMode, memStreamRequired, NULL);
667 }
668 
669 /* Return true if utf8 BOM is found */
checkUTF8BOM(MIO * mio,bool skipIfFound)670 static bool checkUTF8BOM (MIO *mio, bool skipIfFound)
671 {
672 	bool r = false;
673 	if ((0xEF == mio_getc (mio))
674 		&& (0xBB == mio_getc (mio))
675 		&& (0xBF == mio_getc (mio)))
676 		r = true;
677 
678 	if (! (r && skipIfFound))
679 		mio_rewind (mio);
680 	return r;
681 }
682 
rewindInputFile(inputFile * f)683 static void rewindInputFile (inputFile *f)
684 {
685 	mio_rewind (f->mio);
686 	if (f->bomFound)
687 	{
688 		int c CTAGS_ATTR_UNUSED;
689 
690 		c = mio_getc (f->mio);
691 		Assert (c == 0xEF);
692 		c = mio_getc (f->mio);
693 		Assert (c == 0xBB);
694 		c = mio_getc (f->mio);
695 		Assert (c == 0xBF);
696 	}
697 }
698 
699 /*  This function opens an input file, and resets the line counter.  If it
700  *  fails, it will display an error message and leave the File.mio set to NULL.
701  */
openInputFile(const char * const fileName,const langType language,MIO * mio,time_t mtime)702 extern bool openInputFile (const char *const fileName, const langType language,
703 			      MIO *mio, time_t mtime)
704 {
705 	const char *const openMode = "rb";
706 	bool opened = false;
707 	bool memStreamRequired;
708 
709 	/*	If another file was already open, then close it.
710 	 */
711 	if (File.mio != NULL)
712 	{
713 		mio_unref (File.mio);  /* close any open input file */
714 		File.mio = NULL;
715 	}
716 
717 	/* File position is used as key for checking the availability of
718 	   pattern cache in entry.h. If an input file is changed, the
719 	   key is meaningless. So notifying the changing here. */
720 	invalidatePatternCache();
721 
722 	if (File.sourceTagPathHolder == NULL)
723 	{
724 		File.sourceTagPathHolder = stringListNew ();
725 		DEFAULT_TRASH_BOX(File.sourceTagPathHolder, stringListDelete);
726 	}
727 	stringListClear (File.sourceTagPathHolder);
728 
729 	memStreamRequired = doesParserRequireMemoryStream (language);
730 
731 	if (mio)
732 	{
733 		size_t tmp;
734 		if (memStreamRequired && (!mio_memory_get_data (mio, &tmp)))
735 			mio = NULL;
736 		else
737 			mio_rewind (mio);
738 	}
739 
740 	File.mio = mio? mio_ref (mio): getMioFull (fileName, openMode, memStreamRequired, &File.mtime);
741 
742 	if (File.mio == NULL)
743 		error (WARNING | PERROR, "cannot open \"%s\"", fileName);
744 	else
745 	{
746 		opened = true;
747 
748 		if (File.mio == mio)
749 			File.mtime = mtime;
750 
751 		File.bomFound = checkUTF8BOM (File.mio, true);
752 
753 		setOwnerDirectoryOfInputFile (fileName);
754 		mio_getpos (File.mio, &StartOfLine.pos);
755 		mio_getpos (File.mio, &File.filePosition.pos);
756 		File.filePosition.offset = StartOfLine.offset = mio_tell (File.mio);
757 		File.currentLine  = NULL;
758 
759 		File.line = vStringNewOrClear (File.line);
760 		File.ungetchIdx = 0;
761 
762 		setInputFileParameters  (vStringNewInit (fileName), language);
763 		File.input.lineNumberOrigin = 0L;
764 		File.input.lineNumber = File.input.lineNumberOrigin;
765 		setSourceFileParameters (vStringNewInit (fileName), language);
766 		File.source.lineNumberOrigin = 0L;
767 		File.source.lineNumber = File.source.lineNumberOrigin;
768 		allocLineFposMap (&File.lineFposMap);
769 
770 		File.thinDepth = 0;
771 		verbose ("OPENING%s %s as %s language %sfile [%s%s]\n",
772 				 (File.bomFound? "(skipping utf-8 bom)": ""),
773 				 fileName,
774 				 getLanguageName (language),
775 				 File.input.isHeader ? "include " : "",
776 				 mio? "reused": "new",
777 				 memStreamRequired? ",required": "");
778 	}
779 	return opened;
780 }
781 
getInputFileMtime(void)782 extern time_t getInputFileMtime (void)
783 {
784 	return File.mtime;
785 }
786 
resetInputFile(const langType language)787 extern void resetInputFile (const langType language)
788 {
789 	Assert (File.mio);
790 
791 	rewindInputFile  (&File);
792 	mio_getpos (File.mio, &StartOfLine.pos);
793 	mio_getpos (File.mio, &File.filePosition.pos);
794 	File.filePosition.offset = StartOfLine.offset = mio_tell (File.mio);
795 	File.currentLine  = NULL;
796 
797 	Assert (File.line);
798 	vStringClear (File.line);
799 	File.ungetchIdx = 0;
800 
801 	if (hasLanguageMultilineRegexPatterns (language))
802 		File.allLines = vStringNew ();
803 
804 	resetLangOnStack (& inputLang, language);
805 	File.input.lineNumber = File.input.lineNumberOrigin;
806 	sourceLang = language;
807 	File.source.lineNumber = File.source.lineNumberOrigin;
808 }
809 
closeInputFile(void)810 extern void closeInputFile (void)
811 {
812 	if (File.mio != NULL)
813 	{
814 		clearLangOnStack (& inputLang);
815 
816 		/*  The line count of the file is 1 too big, since it is one-based
817 		 *  and is incremented upon each newline.
818 		 */
819 		if (Option.printTotals)
820 		{
821 			fileStatus *status = eStat (vStringValue (File.input.name));
822 			addTotals (0, File.input.lineNumber - 1L, status->size);
823 		}
824 		mio_unref (File.mio);
825 		File.mio = NULL;
826 		freeLineFposMap (&File.lineFposMap);
827 	}
828 }
829 
getInputFileUserData(void)830 extern void *getInputFileUserData(void)
831 {
832 	return mio_get_user_data (File.mio);
833 }
834 
835 /*  Action to take for each encountered input newline.
836  */
fileNewline(bool crAdjustment)837 static void fileNewline (bool crAdjustment)
838 {
839 	File.filePosition = StartOfLine;
840 
841 	if (BackupFile.mio == NULL)
842 		appendLineFposMap (&File.lineFposMap, &File.filePosition,
843 						   crAdjustment);
844 
845 	File.input.lineNumber++;
846 	File.source.lineNumber++;
847 	DebugStatement ( if (Option.breakLine == File.input.lineNumber) lineBreak (); )
848 	DebugStatement ( debugPrintf (DEBUG_RAW, "%6ld: ", File.input.lineNumber); )
849 }
850 
ungetcToInputFile(int c)851 extern void ungetcToInputFile (int c)
852 {
853 	const size_t len = ARRAY_SIZE (File.ungetchBuf);
854 
855 	Assert (File.ungetchIdx < len);
856 	/* we cannot rely on the assertion that might be disabled in non-debug mode */
857 	if (File.ungetchIdx < len)
858 		File.ungetchBuf[File.ungetchIdx++] = c;
859 }
860 
861 typedef enum eEolType {
862 	eol_eof = 0,
863 	eol_nl,
864 	eol_cr_nl,
865 } eolType;
866 
readLine(vString * const vLine,MIO * const mio)867 static eolType readLine (vString *const vLine, MIO *const mio)
868 {
869 	char *str;
870 	size_t size;
871 	eolType r = eol_nl;
872 
873 	vStringClear (vLine);
874 
875 	str = vStringValue (vLine);
876 	size = vStringSize (vLine);
877 
878 	for (;;)
879 	{
880 		bool newLine;
881 		bool eof;
882 
883 		if (mio_gets (mio, str, size) == NULL)
884 		{
885 			if (!mio_eof (mio))
886 				error (FATAL | PERROR, "Failure on attempt to read file");
887 		}
888 		vStringSetLength (vLine);
889 		newLine = vStringLength (vLine) > 0 && vStringLast (vLine) == '\n';
890 		eof = mio_eof (mio);
891 		if (eof)
892 			r = eol_eof;
893 
894 		/* Turn line breaks into a canonical form. The three commonly
895 		 * used forms of line breaks are: LF (UNIX/Mac OS X), CR-LF (MS-DOS) and
896 		 * CR (Mac OS 9). As CR-only EOL isn't handled by gets() and Mac OS 9
897 		 * is dead, we only handle CR-LF EOLs and convert them into LF. */
898 		if (newLine && vStringLength (vLine) > 1 &&
899 			vStringChar (vLine, vStringLength (vLine) - 2) == '\r')
900 		{
901 			vStringChar (vLine, vStringLength (vLine) - 2) = '\n';
902 			vStringChop (vLine);
903 			r = eol_cr_nl;
904 		}
905 
906 		if (newLine || eof)
907 			break;
908 
909 		vStringResize (vLine, vStringLength (vLine) * 2);
910 		str = vStringValue (vLine) + vStringLength (vLine);
911 		size = vStringSize (vLine) - vStringLength (vLine);
912 	}
913 	return r;
914 }
915 
iFileGetLine(bool chop_newline)916 static vString *iFileGetLine (bool chop_newline)
917 {
918 	eolType eol;
919 	langType lang = getInputLanguage();
920 
921 	Assert (File.line);
922 	eol = readLine (File.line, File.mio);
923 
924 	if (vStringLength (File.line) > 0)
925 	{
926 		/* Use StartOfLine from previous iFileGetLine() call */
927 		fileNewline (eol == eol_cr_nl);
928 		/* Store StartOfLine for the next iFileGetLine() call */
929 		mio_getpos (File.mio, &StartOfLine.pos);
930 		StartOfLine.offset = mio_tell (File.mio);
931 
932 		if (Option.lineDirectives && vStringChar (File.line, 0) == '#')
933 			parseLineDirective (vStringValue (File.line) + 1);
934 
935 		if (File.allLines)
936 			vStringCat (File.allLines, File.line);
937 
938 		bool chopped = vStringStripNewline (File.line);
939 
940 		matchLanguageRegex (lang, File.line);
941 
942 		if (chopped && !chop_newline)
943 			vStringPutNewlinAgainUnsafe (File.line);
944 
945 		return File.line;
946 	}
947 	else
948 	{
949 		if (File.allLines)
950 		{
951 			matchLanguageMultilineRegex (lang, File.allLines);
952 			matchLanguageMultitableRegex (lang, File.allLines);
953 
954 			/* To limit the execution of multiline/multitable parser(s) only
955 			   ONCE, clear File.allLines field. */
956 			vStringDelete (File.allLines);
957 			File.allLines = NULL;
958 		}
959 		return NULL;
960 	}
961 }
962 
963 /*  Do not mix use of readLineFromInputFile () and getcFromInputFile () for the same file.
964  */
getcFromInputFile(void)965 extern int getcFromInputFile (void)
966 {
967 	int c;
968 
969 	/*  If there is an ungotten character, then return it.  Don't do any
970 	 *  other processing on it, though, because we already did that the
971 	 *  first time it was read through getcFromInputFile ().
972 	 */
973 	if (File.ungetchIdx > 0)
974 	{
975 		c = File.ungetchBuf[--File.ungetchIdx];
976 		return c;  /* return here to avoid re-calling debugPutc () */
977 	}
978 	do
979 	{
980 		if (File.currentLine != NULL)
981 		{
982 			c = *File.currentLine++;
983 			if (c == '\0')
984 				File.currentLine = NULL;
985 		}
986 		else
987 		{
988 			vString* const line = iFileGetLine (false);
989 			if (line != NULL)
990 				File.currentLine = (unsigned char*) vStringValue (line);
991 			if (File.currentLine == NULL)
992 				c = EOF;
993 			else
994 				c = '\0';
995 		}
996 	} while (c == '\0');
997 	DebugStatement ( debugPutc (DEBUG_READ, c); )
998 	return c;
999 }
1000 
1001 /* returns the nth previous character (0 meaning current), or def if nth cannot
1002  * be accessed.  Note that this can't access previous line data. */
getNthPrevCFromInputFile(unsigned int nth,int def)1003 extern int getNthPrevCFromInputFile (unsigned int nth, int def)
1004 {
1005 	const unsigned char *base = (unsigned char *) vStringValue (File.line);
1006 	const unsigned int offset = File.ungetchIdx + 1 + nth;
1007 
1008 	if (File.currentLine != NULL && File.currentLine >= base + offset)
1009 		return (int) *(File.currentLine - offset);
1010 	else
1011 		return def;
1012 }
1013 
skipToCharacterInInputFile(int c)1014 extern int skipToCharacterInInputFile (int c)
1015 {
1016 	int d;
1017 	do
1018 	{
1019 		d = getcFromInputFile ();
1020 	} while (d != EOF && d != c);
1021 	return d;
1022 }
1023 
skipToCharacterInInputFile2(int c0,int c1)1024 extern int skipToCharacterInInputFile2 (int c0, int c1)
1025 {
1026 	int d;
1027 	do
1028 	{
1029 		skipToCharacterInInputFile(c0);
1030 		do
1031 			d = getcFromInputFile ();
1032 		while (d == c0 && d != c1);
1033 	} while (d != EOF && d != c1);
1034 	return d;
1035 }
1036 
1037 /*  An alternative interface to getcFromInputFile (). Do not mix use of readLineFromInputFile()
1038  *  and getcFromInputFile() for the same file. The returned string does not contain
1039  *  the terminating newline. A NULL return value means that all lines in the
1040  *  file have been read and we are at the end of file.
1041  */
readLineFromInputFile(void)1042 extern const unsigned char *readLineFromInputFile (void)
1043 {
1044 	vString* const line = iFileGetLine (true);
1045 	const unsigned char* result = NULL;
1046 	if (line != NULL)
1047 	{
1048 		result = (const unsigned char*) vStringValue (line);
1049 		DebugStatement ( debugPrintf (DEBUG_READ, "%s\n", result); )
1050 	}
1051 	return result;
1052 }
1053 
1054 /*
1055  *   Raw file line reading with automatic buffer sizing
1056  */
readLineRaw(vString * const vLine,MIO * const mio)1057 extern char *readLineRaw (vString *const vLine, MIO *const mio)
1058 {
1059 	if (mio == NULL)  /* to free memory allocated to buffer */
1060 		error (FATAL, "NULL file pointer");
1061 	else
1062 	{
1063 		readLine (vLine, mio);
1064 
1065 #ifdef HAVE_ICONV
1066 		if (isConverting ())
1067 			convertString (vLine);
1068 #endif
1069 	}
1070 	return vStringLength (vLine) > 0 ? vStringValue (vLine) : NULL;
1071 }
1072 
1073 /*  Places into the line buffer the contents of the line referenced by
1074  *  "location".
1075  */
readLineFromBypass(vString * const vLine,MIOPos location,long * const pSeekValue)1076 extern char *readLineFromBypass (
1077 		vString *const vLine, MIOPos location, long *const pSeekValue)
1078 {
1079 	MIOPos orignalPosition;
1080 	char *result;
1081 
1082 	mio_getpos (File.mio, &orignalPosition);
1083 	mio_setpos (File.mio, &location);
1084 	mio_clearerr (File.mio);
1085 	if (pSeekValue != NULL)
1086 		*pSeekValue = mio_tell (File.mio);
1087 	result = readLineRaw (vLine, File.mio);
1088 	mio_setpos (File.mio, &orignalPosition);
1089 	/* If the file is empty, we can't get the line
1090 	   for location 0. readLineFromBypass doesn't know
1091 	   what itself should do; just report it to the caller. */
1092 	return result;
1093 }
1094 
pushNarrowedInputStream(bool useMemoryStreamInput,unsigned long startLine,long startCharOffset,unsigned long endLine,long endCharOffset,unsigned long sourceLineOffset,int promise)1095 extern void   pushNarrowedInputStream (
1096 				       bool useMemoryStreamInput,
1097 				       unsigned long startLine, long startCharOffset,
1098 				       unsigned long endLine, long endCharOffset,
1099 				       unsigned long sourceLineOffset,
1100 				       int promise)
1101 {
1102 	long p, q;
1103 	MIOPos original;
1104 	MIOPos tmp;
1105 	MIO *subio;
1106 
1107 	if (isThinStreamSpec (startLine, startCharOffset,
1108 						  endLine, endCharOffset,
1109 						  sourceLineOffset))
1110 	{
1111 		if ((!useMemoryStreamInput
1112 			 || mio_memory_get_data (File.mio, NULL)))
1113 		{
1114 			File.thinDepth++;
1115 			verbose ("push thin stream (%d)\n", File.thinDepth);
1116 			return;
1117 		}
1118 		error(WARNING, "INTERNAL ERROR: though pushing thin MEMORY stream, "
1119 			  "underlying input stream is a FILE stream: %s@%s",
1120 			  vStringValue (File.input.name), vStringValue (File.input.tagPath));
1121 		AssertNotReached ();
1122 	}
1123 	Assert (File.thinDepth == 0);
1124 
1125 	original = getInputFilePosition ();
1126 
1127 	tmp = getInputFilePositionForLine (startLine);
1128 	mio_setpos (File.mio, &tmp);
1129 	mio_seek (File.mio, startCharOffset, SEEK_CUR);
1130 	p = mio_tell (File.mio);
1131 
1132 	tmp = getInputFilePositionForLine (endLine);
1133 	mio_setpos (File.mio, &tmp);
1134 	mio_seek (File.mio, endCharOffset, SEEK_CUR);
1135 	q = mio_tell (File.mio);
1136 
1137 	mio_setpos (File.mio, &original);
1138 
1139 	invalidatePatternCache();
1140 
1141 	size_t size = q - p;
1142 	subio = mio_new_mio (File.mio, p, size);
1143 	if (subio == NULL)
1144 		error (FATAL, "memory for mio may be exhausted");
1145 
1146 	runModifiers (promise,
1147 				  startLine, startCharOffset,
1148 				  endLine, endCharOffset,
1149 				  mio_memory_get_data (subio, NULL),
1150 				  size);
1151 
1152 	BackupFile = File;
1153 
1154 	File.mio = subio;
1155 	File.bomFound = false;
1156 	File.nestedInputStreamInfo.startLine = startLine;
1157 	File.nestedInputStreamInfo.startCharOffset = startCharOffset;
1158 	File.nestedInputStreamInfo.endLine = endLine;
1159 	File.nestedInputStreamInfo.endCharOffset = endCharOffset;
1160 
1161 	File.input.lineNumberOrigin = ((startLine == 0)? 0: startLine - 1);
1162 	File.source.lineNumberOrigin = ((sourceLineOffset == 0)? 0: sourceLineOffset - 1);
1163 }
1164 
doesParserRunAsGuest(void)1165 extern bool doesParserRunAsGuest (void)
1166 {
1167 	return !(File.nestedInputStreamInfo.startLine == 0
1168 			 && File.nestedInputStreamInfo.startCharOffset == 0
1169 			 && File.nestedInputStreamInfo.endLine == 0
1170 			 && File.nestedInputStreamInfo.endCharOffset == 0);
1171 }
1172 
getNestedInputBoundaryInfo(unsigned long lineNumber)1173 extern unsigned int getNestedInputBoundaryInfo (unsigned long lineNumber)
1174 {
1175 	unsigned int info;
1176 
1177 	if (!doesParserRunAsGuest())
1178 		/* Not in a nested input stream  */
1179 		return 0;
1180 
1181 	info = 0;
1182 	if (File.nestedInputStreamInfo.startLine == lineNumber
1183 	    && File.nestedInputStreamInfo.startCharOffset != 0)
1184 		info |= BOUNDARY_START;
1185 	if (File.nestedInputStreamInfo.endLine == lineNumber
1186 	    && File.nestedInputStreamInfo.endCharOffset != 0)
1187 		info |= BOUNDARY_END;
1188 
1189 	return info;
1190 }
popNarrowedInputStream(void)1191 extern void   popNarrowedInputStream  (void)
1192 {
1193 	if (File.thinDepth)
1194 	{
1195 		File.thinDepth--;
1196 		verbose ("CLEARING thin flag(%d)\n", File.thinDepth);
1197 		return;
1198 	}
1199 	mio_unref (File.mio);
1200 	File = BackupFile;
1201 	memset (&BackupFile, 0, sizeof (BackupFile));
1202 }
1203 
pushLanguage(const langType language)1204 extern void pushLanguage (const langType language)
1205 {
1206 	pushLangOnStack (& inputLang, language);
1207 }
1208 
popLanguage(void)1209 extern langType popLanguage (void)
1210 {
1211 	return popLangOnStack (& inputLang);
1212 }
1213 
getLanguageForBaseParser(void)1214 extern langType getLanguageForBaseParser (void)
1215 {
1216 	return baseLangOnStack (& inputLang);
1217 }
1218 
langStackInit(langStack * langStack)1219 static void langStackInit (langStack *langStack)
1220 {
1221 	langStack->count = 0;
1222 	langStack->size  = 1;
1223 	langStack->languages = xCalloc (langStack->size, langType);
1224 	DEFAULT_TRASH_BOX(&(langStack->languages), eFreeIndirect);
1225 }
1226 
langStackTop(langStack * langStack)1227 static langType langStackTop (langStack *langStack)
1228 {
1229 	Assert (langStack->count > 0);
1230 	return langStack->languages [langStack->count - 1];
1231 }
1232 
langStackBotom(langStack * langStack)1233 static langType langStackBotom(langStack *langStack)
1234 {
1235 	Assert (langStack->count > 0);
1236 	return langStack->languages [0];
1237 }
1238 
langStackClear(langStack * langStack)1239 static void     langStackClear (langStack *langStack)
1240 {
1241 	while (langStack->count > 0)
1242 		langStackPop (langStack);
1243 }
1244 
langStackPush(langStack * langStack,langType type)1245 static void     langStackPush (langStack *langStack, langType type)
1246 {
1247 	if (langStack->size == 0)
1248 		langStackInit (langStack);
1249 	else if (langStack->count == langStack->size)
1250 		langStack->languages = xRealloc (langStack->languages,
1251 						 ++ langStack->size, langType);
1252 	langStack->languages [ langStack->count ++ ] = type;
1253 }
1254 
langStackPop(langStack * langStack)1255 static langType langStackPop  (langStack *langStack)
1256 {
1257 	return langStack->languages [ -- langStack->count ];
1258 }
1259 
isThinStreamSpec(unsigned long startLine,long startCharOffset,unsigned long endLine,long endCharOffset,unsigned long sourceLineOffset)1260 extern bool isThinStreamSpec(unsigned long startLine, long startCharOffset,
1261 							 unsigned long endLine, long endCharOffset,
1262 							 unsigned long sourceLineOffset)
1263 {
1264 	return (startLine == 0 &&
1265 			startCharOffset == 0 &&
1266 			endLine == 0 &&
1267 			endCharOffset == 0 &&
1268 			sourceLineOffset == 0);
1269 }
1270 
1271 #ifdef DO_TRACING
isTraced(void)1272 extern bool isTraced (void)
1273 {
1274 	if (File.mio == NULL)
1275 		/* A parser is not given. In that case, just check whether --_trace option
1276 		   is given or not. */
1277 		return isMainTraced ();
1278 	else
1279 		/* A parser is given. In that case, check whether the current parser is
1280 		   specified in --_trace=<LANG>,... option */
1281 		return isLanguageTraced (getInputLanguage ());
1282 }
1283 #endif	/* DO_TRACING */
1284