1 /*
2 * Copyright (c) 1996-2002, Darren Hiebert
3 *
4 * This source code is released for free distribution under the terms of the
5 * GNU General Public License version 2 or (at your option) any later version.
6 *
7 * This module contains low level input and tag file read functions (newline
8 * conversion for input files are performed at this level).
9 */
10
11 /*
12 * INCLUDE FILES
13 */
14 #include "general.h" /* must always come first */
15
16 #include <string.h>
17 #include <ctype.h>
18 #include <stdlib.h>
19
20 #define FILE_WRITE
21 #include "read.h"
22 #include "read_p.h"
23 #include "debug.h"
24 #include "entry_p.h"
25 #include "routines.h"
26 #include "routines_p.h"
27 #include "options_p.h"
28 #include "parse_p.h"
29 #include "promise_p.h"
30 #include "stats_p.h"
31 #include "trace.h"
32 #include "trashbox.h"
33 #ifdef HAVE_ICONV
34 # include "mbcs.h"
35 # include "mbcs_p.h"
36 #endif
37
38 /*
39 * DATA DECLARATIONS
40 */
41
42 typedef struct sLangStack {
43 langType *languages;
44 unsigned int count;
45 unsigned int size;
46 } langStack;
47
48 /* Maintains the state of the current input file.
49 */
50 typedef union sInputLangInfo {
51 langStack stack;
52 langType type;
53 } inputLangInfo;
54
55 typedef struct sInputFileInfo {
56 vString *name; /* name to report for input file */
57 vString *tagPath; /* path of input file relative to tag file */
58 unsigned long lineNumber;/* line number in the input file */
59 unsigned long lineNumberOrigin; /* The value set to `lineNumber'
60 when `resetInputFile' is called
61 on the input stream.
62 This is needed for nested stream. */
63 bool isHeader; /* is input file a header file? */
64 } inputFileInfo;
65
66 typedef struct sComputPos {
67 MIOPos pos;
68 long offset;
69 bool open;
70 int crAdjustment;
71 } compoundPos;
72
73 typedef struct sInputLineFposMap {
74 compoundPos *pos;
75 unsigned int count;
76 unsigned int size;
77 } inputLineFposMap;
78
79 typedef struct sNestedInputStreamInfo {
80 unsigned long startLine;
81 long startCharOffset;
82 unsigned long endLine;
83 long endCharOffset;
84 } nestedInputStreamInfo;
85
86 typedef struct sInputFile {
87 vString *path; /* path of input file (if any) */
88 vString *line; /* last line read from file */
89 const unsigned char* currentLine; /* current line being worked on */
90 MIO *mio; /* MIO stream used for reading the file */
91 compoundPos filePosition; /* file position of current line */
92 unsigned int ungetchIdx;
93 int ungetchBuf[8]; /* characters that were ungotten */
94
95 bool bomFound;
96 /* Contains data pertaining to the original `source' file in which the tag
97 * was defined. This may be different from the `input' file when #line
98 * directives are processed (i.e. the input file is preprocessor output).
99 */
100 inputFileInfo input; /* name, lineNumber */
101 inputFileInfo source;
102
103 nestedInputStreamInfo nestedInputStreamInfo;
104
105 /* sourceTagPathHolder is a kind of trash box.
106 The buffer pointed by tagPath field of source field can
107 be referred from tagsEntryInfo instances. sourceTagPathHolder
108 is used keeping the buffer till all processing about the current
109 input file is done. After all processing is done, the buffers
110 in sourceTagPathHolder are destroyed. */
111 stringList * sourceTagPathHolder;
112 inputLineFposMap lineFposMap;
113 vString *allLines;
114 int thinDepth;
115 time_t mtime;
116 } inputFile;
117
118 static inputLangInfo inputLang;
119 static langType sourceLang;
120
121 /*
122 * FUNCTION DECLARATIONS
123 */
124 static void langStackInit (langStack *langStack);
125 static langType langStackTop (langStack *langStack);
126 static langType langStackBotom(langStack *langStack);
127 static void langStackPush (langStack *langStack, langType type);
128 static langType langStackPop (langStack *langStack);
129 static void langStackClear(langStack *langStack);
130
131
132 /*
133 * DATA DEFINITIONS
134 */
135 static inputFile File; /* static read through functions */
136 static inputFile BackupFile; /* File is copied here when a nested parser is pushed */
137 static compoundPos StartOfLine; /* holds deferred position of start of line */
138
139 /*
140 * FUNCTION DEFINITIONS
141 */
142
getInputLineNumber(void)143 extern unsigned long getInputLineNumber (void)
144 {
145 return File.input.lineNumber;
146 }
147
getInputLineOffset(void)148 extern int getInputLineOffset (void)
149 {
150 unsigned char *base = (unsigned char *) vStringValue (File.line);
151 int ret;
152
153 if (File.currentLine)
154 ret = File.currentLine - base - File.ungetchIdx;
155 else if (File.input.lineNumber)
156 {
157 /* When EOF is saw, currentLine is set to NULL.
158 * So the way to calculate the offset at the end of file is tricky.
159 */
160 ret = (mio_tell (File.mio) - (File.bomFound? 3: 0))
161 - getInputFileOffsetForLine(File.input.lineNumber);
162 }
163 else
164 {
165 /* At the first line of file. */
166 ret = mio_tell (File.mio) - (File.bomFound? 3: 0);
167 }
168
169 return ret >= 0 ? ret : 0;
170 }
171
getInputFileName(void)172 extern const char *getInputFileName (void)
173 {
174 if (!File.input.name)
175 return NULL;
176 return vStringValue (File.input.name);
177 }
178
getInputFilePosition(void)179 extern MIOPos getInputFilePosition (void)
180 {
181 return File.filePosition.pos;
182 }
183
getInputFileCompoundPosForLine(unsigned int line)184 static compoundPos* getInputFileCompoundPosForLine (unsigned int line)
185 {
186 int index;
187 if (line > 0)
188 {
189 if (File.lineFposMap.count > (line - 1))
190 index = line - 1;
191 else if (File.lineFposMap.count != 0)
192 index = File.lineFposMap.count - 1;
193 else
194 index = 0;
195 }
196 else
197 index = 0;
198
199 return File.lineFposMap.pos + index;
200 }
201
getInputFilePositionForLine(unsigned int line)202 extern MIOPos getInputFilePositionForLine (unsigned int line)
203 {
204 compoundPos *cpos = getInputFileCompoundPosForLine (line);
205 return cpos->pos;
206 }
207
getInputFileOffsetForLine(unsigned int line)208 extern long getInputFileOffsetForLine (unsigned int line)
209 {
210 compoundPos *cpos = getInputFileCompoundPosForLine (line);
211 return cpos->offset - (File.bomFound? 3: 0);
212 }
213
getInputLanguage(void)214 extern langType getInputLanguage (void)
215 {
216 return langStackTop (&inputLang.stack);
217 }
218
getInputLanguageName(void)219 extern const char *getInputLanguageName (void)
220 {
221 return getLanguageName (getInputLanguage());
222 }
223
getInputFileTagPath(void)224 extern const char *getInputFileTagPath (void)
225 {
226 return vStringValue (File.input.tagPath);
227 }
228
isInputLanguage(langType lang)229 extern bool isInputLanguage (langType lang)
230 {
231 return (bool)((lang) == getInputLanguage ());
232 }
233
isInputHeaderFile(void)234 extern bool isInputHeaderFile (void)
235 {
236 return File.input.isHeader;
237 }
238
isInputLanguageKindEnabled(int kindIndex)239 extern bool isInputLanguageKindEnabled (int kindIndex)
240 {
241 return isLanguageKindEnabled (getInputLanguage (), kindIndex);
242 }
243
isInputLanguageRoleEnabled(int kindIndex,int roleIndex)244 extern bool isInputLanguageRoleEnabled (int kindIndex, int roleIndex)
245 {
246 return isLanguageRoleEnabled (getInputLanguage (),
247 kindIndex, roleIndex);
248 }
249
countInputLanguageKinds(void)250 extern unsigned int countInputLanguageKinds (void)
251 {
252 return countLanguageKinds (getInputLanguage ());
253 }
254
countInputLanguageRoles(int kindIndex)255 extern unsigned int countInputLanguageRoles (int kindIndex)
256 {
257 return countLanguageRoles (getInputLanguage (), kindIndex);
258 }
259
doesInputLanguageAllowNullTag(void)260 extern bool doesInputLanguageAllowNullTag (void)
261 {
262 return doesLanguageAllowNullTag (getInputLanguage ());
263 }
264
doesInputLanguageRequestAutomaticFQTag(const tagEntryInfo * e)265 extern bool doesInputLanguageRequestAutomaticFQTag (const tagEntryInfo *e)
266 {
267 return doesLanguageRequestAutomaticFQTag (e->langType);
268 }
269
getSourceFileTagPath(void)270 extern const char *getSourceFileTagPath (void)
271 {
272 return vStringValue (File.source.tagPath);
273 }
274
getSourceLanguage(void)275 extern langType getSourceLanguage (void)
276 {
277 return sourceLang;
278 }
279
getSourceLineNumber(void)280 extern unsigned long getSourceLineNumber (void)
281 {
282 return File.source.lineNumber;
283 }
284
freeInputFileInfo(inputFileInfo * finfo)285 static void freeInputFileInfo (inputFileInfo *finfo)
286 {
287 if (finfo->name)
288 {
289 vStringDelete (finfo->name);
290 finfo->name = NULL;
291 }
292 if (finfo->tagPath)
293 {
294 vStringDelete (finfo->tagPath);
295 finfo->tagPath = NULL;
296 }
297 }
298
freeInputFileResources(void)299 extern void freeInputFileResources (void)
300 {
301 if (File.path != NULL)
302 vStringDelete (File.path);
303 if (File.line != NULL)
304 vStringDelete (File.line);
305 freeInputFileInfo (&File.input);
306 freeInputFileInfo (&File.source);
307 }
308
getInputFileData(size_t * size)309 extern const unsigned char *getInputFileData (size_t *size)
310 {
311 return mio_memory_get_data (File.mio, size);
312 }
313
314 /*
315 * inputLineFposMap related functions
316 */
freeLineFposMap(inputLineFposMap * lineFposMap)317 static void freeLineFposMap (inputLineFposMap *lineFposMap)
318 {
319 if (lineFposMap->pos)
320 {
321 eFree (lineFposMap->pos);
322 lineFposMap->pos = NULL;
323 lineFposMap->count = 0;
324 lineFposMap->size = 0;
325 }
326 }
327
allocLineFposMap(inputLineFposMap * lineFposMap)328 static void allocLineFposMap (inputLineFposMap *lineFposMap)
329 {
330 #define INITIAL_lineFposMap_LEN 256
331 lineFposMap->pos = xCalloc (INITIAL_lineFposMap_LEN, compoundPos);
332 lineFposMap->size = INITIAL_lineFposMap_LEN;
333 lineFposMap->count = 0;
334 }
335
appendLineFposMap(inputLineFposMap * lineFposMap,compoundPos * pos,bool crAdjustment)336 static void appendLineFposMap (inputLineFposMap *lineFposMap, compoundPos *pos,
337 bool crAdjustment)
338 {
339 int lastCrAdjustment = 0;
340
341 if (lineFposMap->size == lineFposMap->count)
342 {
343 lineFposMap->size *= 2;
344 lineFposMap->pos = xRealloc (lineFposMap->pos,
345 lineFposMap->size,
346 compoundPos);
347 }
348
349 if (lineFposMap->count != 0)
350 {
351 lineFposMap->pos [lineFposMap->count - 1].open = false;
352 lastCrAdjustment = lineFposMap->pos [lineFposMap->count - 1].crAdjustment;
353 }
354
355 lineFposMap->pos [lineFposMap->count] = *pos;
356 lineFposMap->pos [lineFposMap->count].open = true;
357 lineFposMap->pos [lineFposMap->count].crAdjustment
358 = lastCrAdjustment + ((crAdjustment)? 1: 0);
359 lineFposMap->count++;
360 }
361
compoundPosForOffset(const void * oft,const void * p)362 static int compoundPosForOffset (const void* oft, const void *p)
363 {
364 long offset = *(long *)oft;
365 const compoundPos *pos = p;
366 const compoundPos *next = (compoundPos *)(((char *)pos) + sizeof (compoundPos));
367
368 if (offset < (pos->offset - pos->crAdjustment))
369 return -1;
370 else if (((pos->offset - pos->crAdjustment) <= offset)
371 && (pos->open
372 || (offset < (next->offset - next->crAdjustment))))
373 return 0;
374 else
375 return 1;
376 }
377
getInputLineNumberForFileOffset(long offset)378 extern unsigned long getInputLineNumberForFileOffset(long offset)
379 {
380 compoundPos *p;
381
382 if (File.bomFound)
383 offset += 3;
384
385 p = bsearch (&offset, File.lineFposMap.pos, File.lineFposMap.count, sizeof (compoundPos),
386 compoundPosForOffset);
387 if (p == NULL)
388 return 1; /* TODO: 0? */
389 else
390 return 1 + (p - File.lineFposMap.pos);
391 }
392
393 /*
394 * Input file access functions
395 */
396
setOwnerDirectoryOfInputFile(const char * const fileName)397 static void setOwnerDirectoryOfInputFile (const char *const fileName)
398 {
399 const char *const head = fileName;
400 const char *const tail = baseFilename (head);
401
402 if (File.path != NULL)
403 vStringDelete (File.path);
404 if (tail == head)
405 File.path = NULL;
406 else
407 {
408 const size_t length = tail - head - 1;
409 File.path = vStringNew ();
410 vStringNCopyS (File.path, fileName, length);
411 }
412 }
413
setInputFileParametersCommon(inputFileInfo * finfo,vString * const fileName,const langType language,stringList * holder)414 static void setInputFileParametersCommon (inputFileInfo *finfo, vString *const fileName,
415 const langType language,
416 stringList *holder)
417 {
418 if (finfo->name != NULL)
419 vStringDelete (finfo->name);
420 finfo->name = fileName;
421
422 if (finfo->tagPath != NULL)
423 {
424 if (holder)
425 stringListAdd (holder, finfo->tagPath);
426 else
427 vStringDelete (finfo->tagPath);
428 }
429
430 if (0)
431 ;
432 else if ( Option.tagRelative == TREL_ALWAYS )
433 finfo->tagPath =
434 vStringNewOwn (relativeFilename (vStringValue (fileName),
435 getTagFileDirectory ()));
436 else if ( Option.tagRelative == TREL_NEVER )
437 finfo->tagPath =
438 vStringNewOwn (absoluteFilename (vStringValue (fileName)));
439 else if ( Option.tagRelative == TREL_NO || isAbsolutePath (vStringValue (fileName)) )
440 finfo->tagPath = vStringNewCopy (fileName);
441 else
442 finfo->tagPath =
443 vStringNewOwn (relativeFilename (vStringValue (fileName),
444 getTagFileDirectory ()));
445
446 finfo->isHeader = isIncludeFile (vStringValue (fileName));
447 }
448
resetLangOnStack(inputLangInfo * langInfo,langType lang)449 static void resetLangOnStack (inputLangInfo *langInfo, langType lang)
450 {
451 Assert (langInfo->stack.count > 0);
452 langStackClear (& (langInfo->stack));
453 langStackPush (& (langInfo->stack), lang);
454 }
455
baseLangOnStack(inputLangInfo * langInfo)456 extern langType baseLangOnStack (inputLangInfo *langInfo)
457 {
458 Assert (langInfo->stack.count > 0);
459 return langStackBotom (& (langInfo->stack));
460 }
461
pushLangOnStack(inputLangInfo * langInfo,langType lang)462 static void pushLangOnStack (inputLangInfo *langInfo, langType lang)
463 {
464 langStackPush (& langInfo->stack, lang);
465 }
466
popLangOnStack(inputLangInfo * langInfo)467 static langType popLangOnStack (inputLangInfo *langInfo)
468 {
469 return langStackPop (& langInfo->stack);
470 }
471
clearLangOnStack(inputLangInfo * langInfo)472 static void clearLangOnStack (inputLangInfo *langInfo)
473 {
474 langStackClear (& langInfo->stack);
475 }
476
setInputFileParameters(vString * const fileName,const langType language)477 static void setInputFileParameters (vString *const fileName, const langType language)
478 {
479 setInputFileParametersCommon (&File.input, fileName,
480 language, NULL);
481 pushLangOnStack(&inputLang, language);
482 }
483
setSourceFileParameters(vString * const fileName,const langType language)484 static void setSourceFileParameters (vString *const fileName, const langType language)
485 {
486 setInputFileParametersCommon (&File.source, fileName,
487 language, File.sourceTagPathHolder);
488 sourceLang = language;
489 }
490
setSourceFileName(vString * const fileName)491 static bool setSourceFileName (vString *const fileName)
492 {
493 const langType language = getLanguageForFilenameAndContents (vStringValue (fileName));
494 bool result = false;
495 if (language != LANG_IGNORE)
496 {
497 vString *pathName;
498 if (isAbsolutePath (vStringValue (fileName)) || File.path == NULL)
499 pathName = vStringNewCopy (fileName);
500 else
501 {
502 char *tmp = combinePathAndFile (
503 vStringValue (File.path), vStringValue (fileName));
504 pathName = vStringNewOwn (tmp);
505 }
506 setSourceFileParameters (pathName, language);
507 result = true;
508 }
509 return result;
510 }
511
512 /*
513 * Line directive parsing
514 */
515
skipWhite(char ** str)516 static void skipWhite (char **str)
517 {
518 while (**str == ' ' || **str == '\t')
519 (*str)++;
520 }
521
readLineNumber(char ** str)522 static unsigned long readLineNumber (char **str)
523 {
524 char *s;
525 unsigned long lNum = 0;
526
527 skipWhite (str);
528 s = *str;
529 while (*s != '\0' && isdigit (*s))
530 {
531 lNum = (lNum * 10) + (*s - '0');
532 s++;
533 }
534 if (*s != ' ' && *s != '\t')
535 lNum = 0;
536 *str = s;
537
538 return lNum;
539 }
540
541 /* While ANSI only permits lines of the form:
542 * # line n "filename"
543 * Earlier compilers generated lines of the form
544 * # n filename
545 * GNU C will output lines of the form:
546 * # n "filename"
547 * So we need to be fairly flexible in what we accept.
548 */
readFileName(char * s)549 static vString *readFileName (char *s)
550 {
551 vString *const fileName = vStringNew ();
552 bool quoteDelimited = false;
553
554 skipWhite (&s);
555 if (*s == '"')
556 {
557 s++; /* skip double-quote */
558 quoteDelimited = true;
559 }
560 while (*s != '\0' && *s != '\n' &&
561 (quoteDelimited ? (*s != '"') : (*s != ' ' && *s != '\t')))
562 {
563 vStringPut (fileName, *s);
564 s++;
565 }
566 vStringPut (fileName, '\0');
567
568 return fileName;
569 }
570
parseLineDirective(char * s)571 static bool parseLineDirective (char *s)
572 {
573 bool result = false;
574
575 skipWhite (&s);
576 DebugStatement ( const char* lineStr = ""; )
577
578 if (isdigit (*s))
579 result = true;
580 else if (strncmp (s, "line", 4) == 0)
581 {
582 s += 4;
583 if (*s == ' ' || *s == '\t')
584 {
585 DebugStatement ( lineStr = "line"; )
586 result = true;
587 }
588 }
589 if (result)
590 {
591 const unsigned long lNum = readLineNumber (&s);
592 if (lNum == 0)
593 result = false;
594 else
595 {
596 vString *const fileName = readFileName (s);
597 if (vStringLength (fileName) == 0)
598 {
599 File.source.lineNumber = lNum - 1; /* applies to NEXT line */
600 DebugStatement ( debugPrintf (DEBUG_RAW, "#%s %ld", lineStr, lNum); )
601 }
602 else if (setSourceFileName (fileName))
603 {
604 File.source.lineNumber = lNum - 1; /* applies to NEXT line */
605 DebugStatement ( debugPrintf (DEBUG_RAW, "#%s %ld \"%s\"",
606 lineStr, lNum, vStringValue (fileName)); )
607 }
608
609 if (vStringLength (fileName) > 0 &&
610 lNum == 1)
611 makeFileTag (vStringValue (fileName));
612 vStringDelete (fileName);
613 result = true;
614 }
615 }
616 return result;
617 }
618
619 /*
620 * Input file I/O operations
621 */
622 #ifdef DEBUG
623 #define MAX_IN_MEMORY_FILE_SIZE 0
624 #else
625 #define MAX_IN_MEMORY_FILE_SIZE (1024*1024)
626 #endif
627
getMioFull(const char * const fileName,const char * const openMode,bool memStreamRequired,time_t * mtime)628 static MIO *getMioFull (const char *const fileName, const char *const openMode,
629 bool memStreamRequired, time_t *mtime)
630 {
631 FILE *src;
632 fileStatus *st;
633 unsigned long size;
634 unsigned char *data;
635
636 st = eStat (fileName);
637 size = st->size;
638 if (mtime)
639 *mtime = st->mtime;
640 eStatFree (st);
641 if ((!memStreamRequired)
642 && (size > MAX_IN_MEMORY_FILE_SIZE || size == 0))
643 return mio_new_file (fileName, openMode);
644
645 src = fopen (fileName, openMode);
646 if (!src)
647 return NULL;
648
649 data = eMalloc (size);
650 if (fread (data, 1, size, src) != size)
651 {
652 eFree (data);
653 fclose (src);
654 if (memStreamRequired)
655 return NULL;
656 else
657 return mio_new_file (fileName, openMode);
658 }
659 fclose (src);
660 return mio_new_memory (data, size, eRealloc, eFreeNoNullCheck);
661 }
662
getMio(const char * const fileName,const char * const openMode,bool memStreamRequired)663 extern MIO *getMio (const char *const fileName, const char *const openMode,
664 bool memStreamRequired)
665 {
666 return getMioFull (fileName, openMode, memStreamRequired, NULL);
667 }
668
669 /* Return true if utf8 BOM is found */
checkUTF8BOM(MIO * mio,bool skipIfFound)670 static bool checkUTF8BOM (MIO *mio, bool skipIfFound)
671 {
672 bool r = false;
673 if ((0xEF == mio_getc (mio))
674 && (0xBB == mio_getc (mio))
675 && (0xBF == mio_getc (mio)))
676 r = true;
677
678 if (! (r && skipIfFound))
679 mio_rewind (mio);
680 return r;
681 }
682
rewindInputFile(inputFile * f)683 static void rewindInputFile (inputFile *f)
684 {
685 mio_rewind (f->mio);
686 if (f->bomFound)
687 {
688 int c CTAGS_ATTR_UNUSED;
689
690 c = mio_getc (f->mio);
691 Assert (c == 0xEF);
692 c = mio_getc (f->mio);
693 Assert (c == 0xBB);
694 c = mio_getc (f->mio);
695 Assert (c == 0xBF);
696 }
697 }
698
699 /* This function opens an input file, and resets the line counter. If it
700 * fails, it will display an error message and leave the File.mio set to NULL.
701 */
openInputFile(const char * const fileName,const langType language,MIO * mio,time_t mtime)702 extern bool openInputFile (const char *const fileName, const langType language,
703 MIO *mio, time_t mtime)
704 {
705 const char *const openMode = "rb";
706 bool opened = false;
707 bool memStreamRequired;
708
709 /* If another file was already open, then close it.
710 */
711 if (File.mio != NULL)
712 {
713 mio_unref (File.mio); /* close any open input file */
714 File.mio = NULL;
715 }
716
717 /* File position is used as key for checking the availability of
718 pattern cache in entry.h. If an input file is changed, the
719 key is meaningless. So notifying the changing here. */
720 invalidatePatternCache();
721
722 if (File.sourceTagPathHolder == NULL)
723 {
724 File.sourceTagPathHolder = stringListNew ();
725 DEFAULT_TRASH_BOX(File.sourceTagPathHolder, stringListDelete);
726 }
727 stringListClear (File.sourceTagPathHolder);
728
729 memStreamRequired = doesParserRequireMemoryStream (language);
730
731 if (mio)
732 {
733 size_t tmp;
734 if (memStreamRequired && (!mio_memory_get_data (mio, &tmp)))
735 mio = NULL;
736 else
737 mio_rewind (mio);
738 }
739
740 File.mio = mio? mio_ref (mio): getMioFull (fileName, openMode, memStreamRequired, &File.mtime);
741
742 if (File.mio == NULL)
743 error (WARNING | PERROR, "cannot open \"%s\"", fileName);
744 else
745 {
746 opened = true;
747
748 if (File.mio == mio)
749 File.mtime = mtime;
750
751 File.bomFound = checkUTF8BOM (File.mio, true);
752
753 setOwnerDirectoryOfInputFile (fileName);
754 mio_getpos (File.mio, &StartOfLine.pos);
755 mio_getpos (File.mio, &File.filePosition.pos);
756 File.filePosition.offset = StartOfLine.offset = mio_tell (File.mio);
757 File.currentLine = NULL;
758
759 File.line = vStringNewOrClear (File.line);
760 File.ungetchIdx = 0;
761
762 setInputFileParameters (vStringNewInit (fileName), language);
763 File.input.lineNumberOrigin = 0L;
764 File.input.lineNumber = File.input.lineNumberOrigin;
765 setSourceFileParameters (vStringNewInit (fileName), language);
766 File.source.lineNumberOrigin = 0L;
767 File.source.lineNumber = File.source.lineNumberOrigin;
768 allocLineFposMap (&File.lineFposMap);
769
770 File.thinDepth = 0;
771 verbose ("OPENING%s %s as %s language %sfile [%s%s]\n",
772 (File.bomFound? "(skipping utf-8 bom)": ""),
773 fileName,
774 getLanguageName (language),
775 File.input.isHeader ? "include " : "",
776 mio? "reused": "new",
777 memStreamRequired? ",required": "");
778 }
779 return opened;
780 }
781
getInputFileMtime(void)782 extern time_t getInputFileMtime (void)
783 {
784 return File.mtime;
785 }
786
resetInputFile(const langType language)787 extern void resetInputFile (const langType language)
788 {
789 Assert (File.mio);
790
791 rewindInputFile (&File);
792 mio_getpos (File.mio, &StartOfLine.pos);
793 mio_getpos (File.mio, &File.filePosition.pos);
794 File.filePosition.offset = StartOfLine.offset = mio_tell (File.mio);
795 File.currentLine = NULL;
796
797 Assert (File.line);
798 vStringClear (File.line);
799 File.ungetchIdx = 0;
800
801 if (hasLanguageMultilineRegexPatterns (language))
802 File.allLines = vStringNew ();
803
804 resetLangOnStack (& inputLang, language);
805 File.input.lineNumber = File.input.lineNumberOrigin;
806 sourceLang = language;
807 File.source.lineNumber = File.source.lineNumberOrigin;
808 }
809
closeInputFile(void)810 extern void closeInputFile (void)
811 {
812 if (File.mio != NULL)
813 {
814 clearLangOnStack (& inputLang);
815
816 /* The line count of the file is 1 too big, since it is one-based
817 * and is incremented upon each newline.
818 */
819 if (Option.printTotals)
820 {
821 fileStatus *status = eStat (vStringValue (File.input.name));
822 addTotals (0, File.input.lineNumber - 1L, status->size);
823 }
824 mio_unref (File.mio);
825 File.mio = NULL;
826 freeLineFposMap (&File.lineFposMap);
827 }
828 }
829
getInputFileUserData(void)830 extern void *getInputFileUserData(void)
831 {
832 return mio_get_user_data (File.mio);
833 }
834
835 /* Action to take for each encountered input newline.
836 */
fileNewline(bool crAdjustment)837 static void fileNewline (bool crAdjustment)
838 {
839 File.filePosition = StartOfLine;
840
841 if (BackupFile.mio == NULL)
842 appendLineFposMap (&File.lineFposMap, &File.filePosition,
843 crAdjustment);
844
845 File.input.lineNumber++;
846 File.source.lineNumber++;
847 DebugStatement ( if (Option.breakLine == File.input.lineNumber) lineBreak (); )
848 DebugStatement ( debugPrintf (DEBUG_RAW, "%6ld: ", File.input.lineNumber); )
849 }
850
ungetcToInputFile(int c)851 extern void ungetcToInputFile (int c)
852 {
853 const size_t len = ARRAY_SIZE (File.ungetchBuf);
854
855 Assert (File.ungetchIdx < len);
856 /* we cannot rely on the assertion that might be disabled in non-debug mode */
857 if (File.ungetchIdx < len)
858 File.ungetchBuf[File.ungetchIdx++] = c;
859 }
860
861 typedef enum eEolType {
862 eol_eof = 0,
863 eol_nl,
864 eol_cr_nl,
865 } eolType;
866
readLine(vString * const vLine,MIO * const mio)867 static eolType readLine (vString *const vLine, MIO *const mio)
868 {
869 char *str;
870 size_t size;
871 eolType r = eol_nl;
872
873 vStringClear (vLine);
874
875 str = vStringValue (vLine);
876 size = vStringSize (vLine);
877
878 for (;;)
879 {
880 bool newLine;
881 bool eof;
882
883 if (mio_gets (mio, str, size) == NULL)
884 {
885 if (!mio_eof (mio))
886 error (FATAL | PERROR, "Failure on attempt to read file");
887 }
888 vStringSetLength (vLine);
889 newLine = vStringLength (vLine) > 0 && vStringLast (vLine) == '\n';
890 eof = mio_eof (mio);
891 if (eof)
892 r = eol_eof;
893
894 /* Turn line breaks into a canonical form. The three commonly
895 * used forms of line breaks are: LF (UNIX/Mac OS X), CR-LF (MS-DOS) and
896 * CR (Mac OS 9). As CR-only EOL isn't handled by gets() and Mac OS 9
897 * is dead, we only handle CR-LF EOLs and convert them into LF. */
898 if (newLine && vStringLength (vLine) > 1 &&
899 vStringChar (vLine, vStringLength (vLine) - 2) == '\r')
900 {
901 vStringChar (vLine, vStringLength (vLine) - 2) = '\n';
902 vStringChop (vLine);
903 r = eol_cr_nl;
904 }
905
906 if (newLine || eof)
907 break;
908
909 vStringResize (vLine, vStringLength (vLine) * 2);
910 str = vStringValue (vLine) + vStringLength (vLine);
911 size = vStringSize (vLine) - vStringLength (vLine);
912 }
913 return r;
914 }
915
iFileGetLine(bool chop_newline)916 static vString *iFileGetLine (bool chop_newline)
917 {
918 eolType eol;
919 langType lang = getInputLanguage();
920
921 Assert (File.line);
922 eol = readLine (File.line, File.mio);
923
924 if (vStringLength (File.line) > 0)
925 {
926 /* Use StartOfLine from previous iFileGetLine() call */
927 fileNewline (eol == eol_cr_nl);
928 /* Store StartOfLine for the next iFileGetLine() call */
929 mio_getpos (File.mio, &StartOfLine.pos);
930 StartOfLine.offset = mio_tell (File.mio);
931
932 if (Option.lineDirectives && vStringChar (File.line, 0) == '#')
933 parseLineDirective (vStringValue (File.line) + 1);
934
935 if (File.allLines)
936 vStringCat (File.allLines, File.line);
937
938 bool chopped = vStringStripNewline (File.line);
939
940 matchLanguageRegex (lang, File.line);
941
942 if (chopped && !chop_newline)
943 vStringPutNewlinAgainUnsafe (File.line);
944
945 return File.line;
946 }
947 else
948 {
949 if (File.allLines)
950 {
951 matchLanguageMultilineRegex (lang, File.allLines);
952 matchLanguageMultitableRegex (lang, File.allLines);
953
954 /* To limit the execution of multiline/multitable parser(s) only
955 ONCE, clear File.allLines field. */
956 vStringDelete (File.allLines);
957 File.allLines = NULL;
958 }
959 return NULL;
960 }
961 }
962
963 /* Do not mix use of readLineFromInputFile () and getcFromInputFile () for the same file.
964 */
getcFromInputFile(void)965 extern int getcFromInputFile (void)
966 {
967 int c;
968
969 /* If there is an ungotten character, then return it. Don't do any
970 * other processing on it, though, because we already did that the
971 * first time it was read through getcFromInputFile ().
972 */
973 if (File.ungetchIdx > 0)
974 {
975 c = File.ungetchBuf[--File.ungetchIdx];
976 return c; /* return here to avoid re-calling debugPutc () */
977 }
978 do
979 {
980 if (File.currentLine != NULL)
981 {
982 c = *File.currentLine++;
983 if (c == '\0')
984 File.currentLine = NULL;
985 }
986 else
987 {
988 vString* const line = iFileGetLine (false);
989 if (line != NULL)
990 File.currentLine = (unsigned char*) vStringValue (line);
991 if (File.currentLine == NULL)
992 c = EOF;
993 else
994 c = '\0';
995 }
996 } while (c == '\0');
997 DebugStatement ( debugPutc (DEBUG_READ, c); )
998 return c;
999 }
1000
1001 /* returns the nth previous character (0 meaning current), or def if nth cannot
1002 * be accessed. Note that this can't access previous line data. */
getNthPrevCFromInputFile(unsigned int nth,int def)1003 extern int getNthPrevCFromInputFile (unsigned int nth, int def)
1004 {
1005 const unsigned char *base = (unsigned char *) vStringValue (File.line);
1006 const unsigned int offset = File.ungetchIdx + 1 + nth;
1007
1008 if (File.currentLine != NULL && File.currentLine >= base + offset)
1009 return (int) *(File.currentLine - offset);
1010 else
1011 return def;
1012 }
1013
skipToCharacterInInputFile(int c)1014 extern int skipToCharacterInInputFile (int c)
1015 {
1016 int d;
1017 do
1018 {
1019 d = getcFromInputFile ();
1020 } while (d != EOF && d != c);
1021 return d;
1022 }
1023
skipToCharacterInInputFile2(int c0,int c1)1024 extern int skipToCharacterInInputFile2 (int c0, int c1)
1025 {
1026 int d;
1027 do
1028 {
1029 skipToCharacterInInputFile(c0);
1030 do
1031 d = getcFromInputFile ();
1032 while (d == c0 && d != c1);
1033 } while (d != EOF && d != c1);
1034 return d;
1035 }
1036
1037 /* An alternative interface to getcFromInputFile (). Do not mix use of readLineFromInputFile()
1038 * and getcFromInputFile() for the same file. The returned string does not contain
1039 * the terminating newline. A NULL return value means that all lines in the
1040 * file have been read and we are at the end of file.
1041 */
readLineFromInputFile(void)1042 extern const unsigned char *readLineFromInputFile (void)
1043 {
1044 vString* const line = iFileGetLine (true);
1045 const unsigned char* result = NULL;
1046 if (line != NULL)
1047 {
1048 result = (const unsigned char*) vStringValue (line);
1049 DebugStatement ( debugPrintf (DEBUG_READ, "%s\n", result); )
1050 }
1051 return result;
1052 }
1053
1054 /*
1055 * Raw file line reading with automatic buffer sizing
1056 */
readLineRaw(vString * const vLine,MIO * const mio)1057 extern char *readLineRaw (vString *const vLine, MIO *const mio)
1058 {
1059 if (mio == NULL) /* to free memory allocated to buffer */
1060 error (FATAL, "NULL file pointer");
1061 else
1062 {
1063 readLine (vLine, mio);
1064
1065 #ifdef HAVE_ICONV
1066 if (isConverting ())
1067 convertString (vLine);
1068 #endif
1069 }
1070 return vStringLength (vLine) > 0 ? vStringValue (vLine) : NULL;
1071 }
1072
1073 /* Places into the line buffer the contents of the line referenced by
1074 * "location".
1075 */
readLineFromBypass(vString * const vLine,MIOPos location,long * const pSeekValue)1076 extern char *readLineFromBypass (
1077 vString *const vLine, MIOPos location, long *const pSeekValue)
1078 {
1079 MIOPos orignalPosition;
1080 char *result;
1081
1082 mio_getpos (File.mio, &orignalPosition);
1083 mio_setpos (File.mio, &location);
1084 mio_clearerr (File.mio);
1085 if (pSeekValue != NULL)
1086 *pSeekValue = mio_tell (File.mio);
1087 result = readLineRaw (vLine, File.mio);
1088 mio_setpos (File.mio, &orignalPosition);
1089 /* If the file is empty, we can't get the line
1090 for location 0. readLineFromBypass doesn't know
1091 what itself should do; just report it to the caller. */
1092 return result;
1093 }
1094
pushNarrowedInputStream(bool useMemoryStreamInput,unsigned long startLine,long startCharOffset,unsigned long endLine,long endCharOffset,unsigned long sourceLineOffset,int promise)1095 extern void pushNarrowedInputStream (
1096 bool useMemoryStreamInput,
1097 unsigned long startLine, long startCharOffset,
1098 unsigned long endLine, long endCharOffset,
1099 unsigned long sourceLineOffset,
1100 int promise)
1101 {
1102 long p, q;
1103 MIOPos original;
1104 MIOPos tmp;
1105 MIO *subio;
1106
1107 if (isThinStreamSpec (startLine, startCharOffset,
1108 endLine, endCharOffset,
1109 sourceLineOffset))
1110 {
1111 if ((!useMemoryStreamInput
1112 || mio_memory_get_data (File.mio, NULL)))
1113 {
1114 File.thinDepth++;
1115 verbose ("push thin stream (%d)\n", File.thinDepth);
1116 return;
1117 }
1118 error(WARNING, "INTERNAL ERROR: though pushing thin MEMORY stream, "
1119 "underlying input stream is a FILE stream: %s@%s",
1120 vStringValue (File.input.name), vStringValue (File.input.tagPath));
1121 AssertNotReached ();
1122 }
1123 Assert (File.thinDepth == 0);
1124
1125 original = getInputFilePosition ();
1126
1127 tmp = getInputFilePositionForLine (startLine);
1128 mio_setpos (File.mio, &tmp);
1129 mio_seek (File.mio, startCharOffset, SEEK_CUR);
1130 p = mio_tell (File.mio);
1131
1132 tmp = getInputFilePositionForLine (endLine);
1133 mio_setpos (File.mio, &tmp);
1134 mio_seek (File.mio, endCharOffset, SEEK_CUR);
1135 q = mio_tell (File.mio);
1136
1137 mio_setpos (File.mio, &original);
1138
1139 invalidatePatternCache();
1140
1141 size_t size = q - p;
1142 subio = mio_new_mio (File.mio, p, size);
1143 if (subio == NULL)
1144 error (FATAL, "memory for mio may be exhausted");
1145
1146 runModifiers (promise,
1147 startLine, startCharOffset,
1148 endLine, endCharOffset,
1149 mio_memory_get_data (subio, NULL),
1150 size);
1151
1152 BackupFile = File;
1153
1154 File.mio = subio;
1155 File.bomFound = false;
1156 File.nestedInputStreamInfo.startLine = startLine;
1157 File.nestedInputStreamInfo.startCharOffset = startCharOffset;
1158 File.nestedInputStreamInfo.endLine = endLine;
1159 File.nestedInputStreamInfo.endCharOffset = endCharOffset;
1160
1161 File.input.lineNumberOrigin = ((startLine == 0)? 0: startLine - 1);
1162 File.source.lineNumberOrigin = ((sourceLineOffset == 0)? 0: sourceLineOffset - 1);
1163 }
1164
doesParserRunAsGuest(void)1165 extern bool doesParserRunAsGuest (void)
1166 {
1167 return !(File.nestedInputStreamInfo.startLine == 0
1168 && File.nestedInputStreamInfo.startCharOffset == 0
1169 && File.nestedInputStreamInfo.endLine == 0
1170 && File.nestedInputStreamInfo.endCharOffset == 0);
1171 }
1172
getNestedInputBoundaryInfo(unsigned long lineNumber)1173 extern unsigned int getNestedInputBoundaryInfo (unsigned long lineNumber)
1174 {
1175 unsigned int info;
1176
1177 if (!doesParserRunAsGuest())
1178 /* Not in a nested input stream */
1179 return 0;
1180
1181 info = 0;
1182 if (File.nestedInputStreamInfo.startLine == lineNumber
1183 && File.nestedInputStreamInfo.startCharOffset != 0)
1184 info |= BOUNDARY_START;
1185 if (File.nestedInputStreamInfo.endLine == lineNumber
1186 && File.nestedInputStreamInfo.endCharOffset != 0)
1187 info |= BOUNDARY_END;
1188
1189 return info;
1190 }
popNarrowedInputStream(void)1191 extern void popNarrowedInputStream (void)
1192 {
1193 if (File.thinDepth)
1194 {
1195 File.thinDepth--;
1196 verbose ("CLEARING thin flag(%d)\n", File.thinDepth);
1197 return;
1198 }
1199 mio_unref (File.mio);
1200 File = BackupFile;
1201 memset (&BackupFile, 0, sizeof (BackupFile));
1202 }
1203
pushLanguage(const langType language)1204 extern void pushLanguage (const langType language)
1205 {
1206 pushLangOnStack (& inputLang, language);
1207 }
1208
popLanguage(void)1209 extern langType popLanguage (void)
1210 {
1211 return popLangOnStack (& inputLang);
1212 }
1213
getLanguageForBaseParser(void)1214 extern langType getLanguageForBaseParser (void)
1215 {
1216 return baseLangOnStack (& inputLang);
1217 }
1218
langStackInit(langStack * langStack)1219 static void langStackInit (langStack *langStack)
1220 {
1221 langStack->count = 0;
1222 langStack->size = 1;
1223 langStack->languages = xCalloc (langStack->size, langType);
1224 DEFAULT_TRASH_BOX(&(langStack->languages), eFreeIndirect);
1225 }
1226
langStackTop(langStack * langStack)1227 static langType langStackTop (langStack *langStack)
1228 {
1229 Assert (langStack->count > 0);
1230 return langStack->languages [langStack->count - 1];
1231 }
1232
langStackBotom(langStack * langStack)1233 static langType langStackBotom(langStack *langStack)
1234 {
1235 Assert (langStack->count > 0);
1236 return langStack->languages [0];
1237 }
1238
langStackClear(langStack * langStack)1239 static void langStackClear (langStack *langStack)
1240 {
1241 while (langStack->count > 0)
1242 langStackPop (langStack);
1243 }
1244
langStackPush(langStack * langStack,langType type)1245 static void langStackPush (langStack *langStack, langType type)
1246 {
1247 if (langStack->size == 0)
1248 langStackInit (langStack);
1249 else if (langStack->count == langStack->size)
1250 langStack->languages = xRealloc (langStack->languages,
1251 ++ langStack->size, langType);
1252 langStack->languages [ langStack->count ++ ] = type;
1253 }
1254
langStackPop(langStack * langStack)1255 static langType langStackPop (langStack *langStack)
1256 {
1257 return langStack->languages [ -- langStack->count ];
1258 }
1259
isThinStreamSpec(unsigned long startLine,long startCharOffset,unsigned long endLine,long endCharOffset,unsigned long sourceLineOffset)1260 extern bool isThinStreamSpec(unsigned long startLine, long startCharOffset,
1261 unsigned long endLine, long endCharOffset,
1262 unsigned long sourceLineOffset)
1263 {
1264 return (startLine == 0 &&
1265 startCharOffset == 0 &&
1266 endLine == 0 &&
1267 endCharOffset == 0 &&
1268 sourceLineOffset == 0);
1269 }
1270
1271 #ifdef DO_TRACING
isTraced(void)1272 extern bool isTraced (void)
1273 {
1274 if (File.mio == NULL)
1275 /* A parser is not given. In that case, just check whether --_trace option
1276 is given or not. */
1277 return isMainTraced ();
1278 else
1279 /* A parser is given. In that case, check whether the current parser is
1280 specified in --_trace=<LANG>,... option */
1281 return isLanguageTraced (getInputLanguage ());
1282 }
1283 #endif /* DO_TRACING */
1284