xref: /Universal-ctags/parsers/bibtex.c (revision a5ce885dcffd63ba471c4f69c803b372b0af734c)
168f6cb9eSMirco Schönfeld /*
268f6cb9eSMirco Schönfeld  *	 Copyright (c) 2008, David Fishburn
368f6cb9eSMirco Schönfeld  *	 Copyright (c) 2012, Jan Larres
468f6cb9eSMirco Schönfeld  *	 Copyright (c) 2019, Mirco Schönfeld
568f6cb9eSMirco Schönfeld  *
668f6cb9eSMirco Schönfeld  *	 This source code is released for free distribution under the terms of the
768f6cb9eSMirco Schönfeld  *	 GNU General Public License version 2 or (at your option) any later version.
868f6cb9eSMirco Schönfeld  *
968f6cb9eSMirco Schönfeld  *	 This module contains functions for generating identifiers of entries of Bibtex language files.
1068f6cb9eSMirco Schönfeld  *
1168f6cb9eSMirco Schönfeld  *	 BibTex language "reference":
1268f6cb9eSMirco Schönfeld  *		 https://en.wikipedia.org/wiki/BibTeX
1368f6cb9eSMirco Schönfeld  */
1468f6cb9eSMirco Schönfeld 
1568f6cb9eSMirco Schönfeld /*
1668f6cb9eSMirco Schönfeld  *	 INCLUDE FILES
1768f6cb9eSMirco Schönfeld  */
1868f6cb9eSMirco Schönfeld #include "general.h"	/* must always come first */
1968f6cb9eSMirco Schönfeld #include <ctype.h>	/* to define isalpha () */
2068f6cb9eSMirco Schönfeld #include <string.h>
2168f6cb9eSMirco Schönfeld 
2268f6cb9eSMirco Schönfeld #include "debug.h"
2368f6cb9eSMirco Schönfeld #include "entry.h"
2468f6cb9eSMirco Schönfeld #include "keyword.h"
2568f6cb9eSMirco Schönfeld #include "parse.h"
2668f6cb9eSMirco Schönfeld #include "read.h"
2768f6cb9eSMirco Schönfeld #include "routines.h"
2868f6cb9eSMirco Schönfeld #include "vstring.h"
2968f6cb9eSMirco Schönfeld 
3068f6cb9eSMirco Schönfeld /*
3168f6cb9eSMirco Schönfeld  *	 MACROS
3268f6cb9eSMirco Schönfeld  */
3368f6cb9eSMirco Schönfeld #define isType(token,t)		(bool) ((token)->type == (t))
3468f6cb9eSMirco Schönfeld #define isKeyword(token,k)	(bool) ((token)->keyword == (k))
3568f6cb9eSMirco Schönfeld #define isIdentChar(c) \
36*cdfda617SMasatake YAMATO 	(isalpha (c) || isdigit (c) || (c) == '_' || (c) == '-' || (c) == '+' || (c) == ':')
3768f6cb9eSMirco Schönfeld 
3868f6cb9eSMirco Schönfeld /*
3968f6cb9eSMirco Schönfeld  *	 DATA DECLARATIONS
4068f6cb9eSMirco Schönfeld  */
4168f6cb9eSMirco Schönfeld 
4268f6cb9eSMirco Schönfeld /*
4368f6cb9eSMirco Schönfeld  * Used to specify type of keyword.
4468f6cb9eSMirco Schönfeld  */
4568f6cb9eSMirco Schönfeld enum eKeywordId {
4668f6cb9eSMirco Schönfeld 	KEYWORD_article,
4768f6cb9eSMirco Schönfeld 	KEYWORD_book,
4868f6cb9eSMirco Schönfeld 	KEYWORD_booklet,
4968f6cb9eSMirco Schönfeld 	KEYWORD_conference,
5068f6cb9eSMirco Schönfeld 	KEYWORD_inbook,
5168f6cb9eSMirco Schönfeld 	KEYWORD_incollection,
5268f6cb9eSMirco Schönfeld 	KEYWORD_inproceedings,
5368f6cb9eSMirco Schönfeld 	KEYWORD_manual,
5468f6cb9eSMirco Schönfeld 	KEYWORD_mastersthesis,
5568f6cb9eSMirco Schönfeld 	KEYWORD_misc,
5668f6cb9eSMirco Schönfeld 	KEYWORD_phdthesis,
5768f6cb9eSMirco Schönfeld 	KEYWORD_proceedings,
5867536c85SMirco Schoenfeld 	KEYWORD_string,
5968f6cb9eSMirco Schönfeld 	KEYWORD_techreport,
6068f6cb9eSMirco Schönfeld 	KEYWORD_unpublished
6168f6cb9eSMirco Schönfeld };
6268f6cb9eSMirco Schönfeld typedef int keywordId; /* to allow KEYWORD_NONE */
6368f6cb9eSMirco Schönfeld 
6468f6cb9eSMirco Schönfeld enum eTokenType {
6568f6cb9eSMirco Schönfeld 	/* 0..255 are the byte's value.  Some are named for convenience */
6668f6cb9eSMirco Schönfeld 	TOKEN_OPEN_CURLY = '{',
6768f6cb9eSMirco Schönfeld 	/* above is special types */
6868f6cb9eSMirco Schönfeld 	TOKEN_UNDEFINED = 256,
6968f6cb9eSMirco Schönfeld 	TOKEN_KEYWORD,
7068f6cb9eSMirco Schönfeld 	TOKEN_IDENTIFIER
7168f6cb9eSMirco Schönfeld };
7268f6cb9eSMirco Schönfeld typedef int tokenType;
7368f6cb9eSMirco Schönfeld 
7468f6cb9eSMirco Schönfeld typedef struct sTokenInfo {
7568f6cb9eSMirco Schönfeld 	tokenType		type;
7668f6cb9eSMirco Schönfeld 	keywordId		keyword;
7768f6cb9eSMirco Schönfeld 	vString *		string;
7868f6cb9eSMirco Schönfeld 	unsigned long 	lineNumber;
7968f6cb9eSMirco Schönfeld 	MIOPos 			filePosition;
8068f6cb9eSMirco Schönfeld } tokenInfo;
8168f6cb9eSMirco Schönfeld 
8268f6cb9eSMirco Schönfeld /*
8368f6cb9eSMirco Schönfeld  *	DATA DEFINITIONS
8468f6cb9eSMirco Schönfeld  */
8568f6cb9eSMirco Schönfeld 
8668f6cb9eSMirco Schönfeld static langType Lang_bib;
8768f6cb9eSMirco Schönfeld 
8868f6cb9eSMirco Schönfeld typedef enum {
8968f6cb9eSMirco Schönfeld 	BIBTAG_ARTICLE,
9068f6cb9eSMirco Schönfeld 	BIBTAG_BOOK,
9168f6cb9eSMirco Schönfeld 	BIBTAG_BOOKLET,
9268f6cb9eSMirco Schönfeld 	BIBTAG_CONFERENCE,
9368f6cb9eSMirco Schönfeld 	BIBTAG_INBOOK,
9468f6cb9eSMirco Schönfeld 	BIBTAG_INCOLLECTION,
9568f6cb9eSMirco Schönfeld 	BIBTAG_INPROCEEDINGS,
9668f6cb9eSMirco Schönfeld 	BIBTAG_MANUAL,
9768f6cb9eSMirco Schönfeld 	BIBTAG_MASTERSTHESIS,
9868f6cb9eSMirco Schönfeld 	BIBTAG_MISC,
9968f6cb9eSMirco Schönfeld 	BIBTAG_PHDTHESIS,
10068f6cb9eSMirco Schönfeld 	BIBTAG_PROCEEDINGS,
10167536c85SMirco Schoenfeld 	BIBTAG_STRING,
10268f6cb9eSMirco Schönfeld 	BIBTAG_TECHREPORT,
10368f6cb9eSMirco Schönfeld 	BIBTAG_UNPUBLISHED,
10468f6cb9eSMirco Schönfeld 	BIBTAG_COUNT
10568f6cb9eSMirco Schönfeld } bibKind;
10668f6cb9eSMirco Schönfeld 
10768f6cb9eSMirco Schönfeld static kindDefinition BibKinds [] = {
10868f6cb9eSMirco Schönfeld 	{ true,  'a', "article",				"article"				},
10968f6cb9eSMirco Schönfeld 	{ true,  'b', "book",						"book"					},
11068f6cb9eSMirco Schönfeld 	{ true,  'B', "booklet",				"booklet"				},
11168f6cb9eSMirco Schönfeld 	{ true,  'c', "conference",			"conference"		},
11268f6cb9eSMirco Schönfeld 	{ true,  'i', "inbook",					"inbook"				},
11368f6cb9eSMirco Schönfeld 	{ true,  'I', "incollection",		"incollection"	},
11468f6cb9eSMirco Schönfeld 	{ true,  'j', "inproceedings",	"inproceedings"	},
11568f6cb9eSMirco Schönfeld 	{ true,  'm', "manual",					"manual"				},
11668f6cb9eSMirco Schönfeld 	{ true,  'M', "mastersthesis",	"mastersthesis"	},
11768f6cb9eSMirco Schönfeld 	{ true,  'n', "misc",						"misc"					},
11868f6cb9eSMirco Schönfeld 	{ true,  'p', "phdthesis",			"phdthesis"			},
11968f6cb9eSMirco Schönfeld 	{ true,  'P', "proceedings",		"proceedings"		},
12067536c85SMirco Schoenfeld 	{ true,  's', "string",					"string"				},
12168f6cb9eSMirco Schönfeld 	{ true,  't', "techreport",			"techreport"		},
12268f6cb9eSMirco Schönfeld 	{ true,  'u', "unpublished",		"unpublished"		}
12368f6cb9eSMirco Schönfeld };
12468f6cb9eSMirco Schönfeld 
12568f6cb9eSMirco Schönfeld static const keywordTable BibKeywordTable [] = {
12668f6cb9eSMirco Schönfeld 	/* keyword			  keyword ID */
12768f6cb9eSMirco Schönfeld 	{ "article",	    KEYWORD_article				},
12868f6cb9eSMirco Schönfeld 	{ "book",	        KEYWORD_book				  },
12968f6cb9eSMirco Schönfeld 	{ "booklet",	    KEYWORD_booklet				},
13068f6cb9eSMirco Schönfeld 	{ "conference",	  KEYWORD_conference		},
13168f6cb9eSMirco Schönfeld 	{ "inbook",	      KEYWORD_inbook				},
13268f6cb9eSMirco Schönfeld 	{ "incollection",	KEYWORD_incollection	},
13368f6cb9eSMirco Schönfeld 	{ "inproceedings",KEYWORD_inproceedings	},
13468f6cb9eSMirco Schönfeld 	{ "manual",	      KEYWORD_manual				},
13568f6cb9eSMirco Schönfeld 	{ "mastersthesis",KEYWORD_mastersthesis	},
13668f6cb9eSMirco Schönfeld 	{ "misc",	        KEYWORD_misc				  },
13768f6cb9eSMirco Schönfeld 	{ "phdthesis",	  KEYWORD_phdthesis			},
13868f6cb9eSMirco Schönfeld 	{ "proceedings",	KEYWORD_proceedings		},
13967536c85SMirco Schoenfeld 	{ "string",				KEYWORD_string				},
14068f6cb9eSMirco Schönfeld 	{ "techreport",	  KEYWORD_techreport		},
14168f6cb9eSMirco Schönfeld 	{ "unpublished",	KEYWORD_unpublished		}
14268f6cb9eSMirco Schönfeld };
14368f6cb9eSMirco Schönfeld 
14468f6cb9eSMirco Schönfeld /*
14568f6cb9eSMirco Schönfeld  *	 FUNCTION DEFINITIONS
14668f6cb9eSMirco Schönfeld  */
14768f6cb9eSMirco Schönfeld 
newToken(void)14868f6cb9eSMirco Schönfeld static tokenInfo *newToken (void)
14968f6cb9eSMirco Schönfeld {
15068f6cb9eSMirco Schönfeld 	tokenInfo *const token = xMalloc (1, tokenInfo);
15168f6cb9eSMirco Schönfeld 
15268f6cb9eSMirco Schönfeld 	token->type			= TOKEN_UNDEFINED;
15368f6cb9eSMirco Schönfeld 	token->keyword		= KEYWORD_NONE;
15468f6cb9eSMirco Schönfeld 	token->string		= vStringNew ();
15568f6cb9eSMirco Schönfeld 	token->lineNumber   = getInputLineNumber ();
15668f6cb9eSMirco Schönfeld 	token->filePosition = getInputFilePosition ();
15768f6cb9eSMirco Schönfeld 
15868f6cb9eSMirco Schönfeld 	return token;
15968f6cb9eSMirco Schönfeld }
16068f6cb9eSMirco Schönfeld 
deleteToken(tokenInfo * const token)16168f6cb9eSMirco Schönfeld static void deleteToken (tokenInfo *const token)
16268f6cb9eSMirco Schönfeld {
16368f6cb9eSMirco Schönfeld 	vStringDelete (token->string);
16468f6cb9eSMirco Schönfeld 	eFree (token);
16568f6cb9eSMirco Schönfeld }
16668f6cb9eSMirco Schönfeld 
16768f6cb9eSMirco Schönfeld /*
16868f6cb9eSMirco Schönfeld  *	 Tag generation functions
16968f6cb9eSMirco Schönfeld  */
makeBibTag(tokenInfo * const token,bibKind kind)17068f6cb9eSMirco Schönfeld static void makeBibTag (tokenInfo *const token, bibKind kind)
17168f6cb9eSMirco Schönfeld {
17268f6cb9eSMirco Schönfeld 	if (BibKinds [kind].enabled)
17368f6cb9eSMirco Schönfeld 	{
17468f6cb9eSMirco Schönfeld 		const char *const name = vStringValue (token->string);
17568f6cb9eSMirco Schönfeld 		tagEntryInfo e;
17668f6cb9eSMirco Schönfeld 		initTagEntry (&e, name, kind);
17768f6cb9eSMirco Schönfeld 
17868f6cb9eSMirco Schönfeld 		e.lineNumber   = token->lineNumber;
17968f6cb9eSMirco Schönfeld 		e.filePosition = token->filePosition;
18068f6cb9eSMirco Schönfeld 
18168f6cb9eSMirco Schönfeld 		makeTagEntry (&e);
18268f6cb9eSMirco Schönfeld 	}
18368f6cb9eSMirco Schönfeld }
18468f6cb9eSMirco Schönfeld 
18568f6cb9eSMirco Schönfeld /*
18668f6cb9eSMirco Schönfeld  *	 Parsing functions
18768f6cb9eSMirco Schönfeld  */
18868f6cb9eSMirco Schönfeld 
18968f6cb9eSMirco Schönfeld /*
19068f6cb9eSMirco Schönfeld  *	Read a C identifier beginning with "firstChar" and places it into
19168f6cb9eSMirco Schönfeld  *	"name".
19268f6cb9eSMirco Schönfeld  */
parseIdentifier(vString * const string,const int firstChar)19368f6cb9eSMirco Schönfeld static void parseIdentifier (vString *const string, const int firstChar)
19468f6cb9eSMirco Schönfeld {
19568f6cb9eSMirco Schönfeld 	int c = firstChar;
19668f6cb9eSMirco Schönfeld 	Assert (isIdentChar (c));
19768f6cb9eSMirco Schönfeld 	do
19868f6cb9eSMirco Schönfeld 	{
19968f6cb9eSMirco Schönfeld 		vStringPut (string, c);
20068f6cb9eSMirco Schönfeld 		c = getcFromInputFile ();
20168f6cb9eSMirco Schönfeld 	} while (c != EOF && isIdentChar (c));
20268f6cb9eSMirco Schönfeld 	if (c != EOF)
20368f6cb9eSMirco Schönfeld 		ungetcToInputFile (c);		/* unget non-identifier character */
20468f6cb9eSMirco Schönfeld }
20568f6cb9eSMirco Schönfeld 
readToken(tokenInfo * const token)20668f6cb9eSMirco Schönfeld static bool readToken (tokenInfo *const token)
20768f6cb9eSMirco Schönfeld {
20868f6cb9eSMirco Schönfeld 	int c;
20968f6cb9eSMirco Schönfeld 
21068f6cb9eSMirco Schönfeld 	token->type			= TOKEN_UNDEFINED;
21168f6cb9eSMirco Schönfeld 	token->keyword		= KEYWORD_NONE;
21268f6cb9eSMirco Schönfeld 	vStringClear (token->string);
21368f6cb9eSMirco Schönfeld 
21468f6cb9eSMirco Schönfeld getNextChar:
21568f6cb9eSMirco Schönfeld 
21668f6cb9eSMirco Schönfeld 	do
21768f6cb9eSMirco Schönfeld 	{
21868f6cb9eSMirco Schönfeld 		c = getcFromInputFile ();
21968f6cb9eSMirco Schönfeld 	}
22068f6cb9eSMirco Schönfeld 	while (c == '\t' || c == ' ' || c == '\n');
22168f6cb9eSMirco Schönfeld 
22268f6cb9eSMirco Schönfeld 	token->lineNumber   = getInputLineNumber ();
22368f6cb9eSMirco Schönfeld 	token->filePosition = getInputFilePosition ();
22468f6cb9eSMirco Schönfeld 
22568f6cb9eSMirco Schönfeld 	token->type = (unsigned char) c;
22668f6cb9eSMirco Schönfeld 	switch (c)
22768f6cb9eSMirco Schönfeld 	{
22868f6cb9eSMirco Schönfeld 		case EOF: return false;
22968f6cb9eSMirco Schönfeld 
23068f6cb9eSMirco Schönfeld 		case '@':
23168f6cb9eSMirco Schönfeld 					/*
23268f6cb9eSMirco Schönfeld 					 * All Bib entries start with an at symbol.
23368f6cb9eSMirco Schönfeld 					 * Check if the next character is an alpha character
23468f6cb9eSMirco Schönfeld 					 * else it is not a potential tex tag.
23568f6cb9eSMirco Schönfeld 					 */
23668f6cb9eSMirco Schönfeld 					c = getcFromInputFile ();
23768f6cb9eSMirco Schönfeld 					if (! isalpha (c))
23868f6cb9eSMirco Schönfeld 					  ungetcToInputFile (c);
23968f6cb9eSMirco Schönfeld 					else
24068f6cb9eSMirco Schönfeld 					{
24168f6cb9eSMirco Schönfeld 						vStringPut (token->string, '@');
24268f6cb9eSMirco Schönfeld 						parseIdentifier (token->string, c);
24368f6cb9eSMirco Schönfeld 						token->keyword = lookupCaseKeyword (vStringValue (token->string) + 1, Lang_bib);
24468f6cb9eSMirco Schönfeld 						if (isKeyword (token, KEYWORD_NONE))
24568f6cb9eSMirco Schönfeld 							token->type = TOKEN_IDENTIFIER;
24668f6cb9eSMirco Schönfeld 						else
24768f6cb9eSMirco Schönfeld 							token->type = TOKEN_KEYWORD;
24868f6cb9eSMirco Schönfeld 					}
24968f6cb9eSMirco Schönfeld 					break;
25068f6cb9eSMirco Schönfeld 		case '%':
25168f6cb9eSMirco Schönfeld 					skipToCharacterInInputFile ('\n'); /* % are single line comments */
25268f6cb9eSMirco Schönfeld 					goto getNextChar;
25368f6cb9eSMirco Schönfeld 					break;
25468f6cb9eSMirco Schönfeld 		default:
25568f6cb9eSMirco Schönfeld 					if (isIdentChar (c))
25668f6cb9eSMirco Schönfeld 					{
25768f6cb9eSMirco Schönfeld 						parseIdentifier (token->string, c);
25868f6cb9eSMirco Schönfeld 						token->type = TOKEN_IDENTIFIER;
25968f6cb9eSMirco Schönfeld 					}
26068f6cb9eSMirco Schönfeld 					break;
26168f6cb9eSMirco Schönfeld 	}
26268f6cb9eSMirco Schönfeld 	return true;
26368f6cb9eSMirco Schönfeld }
26468f6cb9eSMirco Schönfeld 
copyToken(tokenInfo * const dest,tokenInfo * const src)26568f6cb9eSMirco Schönfeld static void copyToken (tokenInfo *const dest, tokenInfo *const src)
26668f6cb9eSMirco Schönfeld {
26768f6cb9eSMirco Schönfeld 	dest->lineNumber = src->lineNumber;
26868f6cb9eSMirco Schönfeld 	dest->filePosition = src->filePosition;
26968f6cb9eSMirco Schönfeld 	dest->type = src->type;
27068f6cb9eSMirco Schönfeld 	dest->keyword = src->keyword;
27168f6cb9eSMirco Schönfeld 	vStringCopy (dest->string, src->string);
27268f6cb9eSMirco Schönfeld }
27368f6cb9eSMirco Schönfeld 
27468f6cb9eSMirco Schönfeld /*
27568f6cb9eSMirco Schönfeld  *	 Scanning functions
27668f6cb9eSMirco Schönfeld  */
27768f6cb9eSMirco Schönfeld 
parseTag(tokenInfo * const token,bibKind kind)27868f6cb9eSMirco Schönfeld static bool parseTag (tokenInfo *const token, bibKind kind)
27968f6cb9eSMirco Schönfeld {
28068f6cb9eSMirco Schönfeld 	tokenInfo *	const name = newToken ();
28168f6cb9eSMirco Schönfeld 	vString *		currentid;
28268f6cb9eSMirco Schönfeld 	bool				eof = false;
28368f6cb9eSMirco Schönfeld 
28468f6cb9eSMirco Schönfeld 	currentid = vStringNew ();
28568f6cb9eSMirco Schönfeld 	/*
28668f6cb9eSMirco Schönfeld 	 * Bib entries are of these formats:
28768f6cb9eSMirco Schönfeld 	 *   @article{identifier,
28868f6cb9eSMirco Schönfeld 	 *   author="John Doe"}
28968f6cb9eSMirco Schönfeld 	 *
29068f6cb9eSMirco Schönfeld 	 * When a keyword is found, loop through all words up to
29168f6cb9eSMirco Schönfeld 	 * a comma brace for the tag name.
29268f6cb9eSMirco Schönfeld 	 *
29368f6cb9eSMirco Schönfeld 	 */
29468f6cb9eSMirco Schönfeld 	if (isType (token, TOKEN_KEYWORD))
29568f6cb9eSMirco Schönfeld 	{
29668f6cb9eSMirco Schönfeld 		copyToken (name, token);
29768f6cb9eSMirco Schönfeld 		if (!readToken (token))
29868f6cb9eSMirco Schönfeld 		{
29968f6cb9eSMirco Schönfeld 			eof = true;
30068f6cb9eSMirco Schönfeld 			goto out;
30168f6cb9eSMirco Schönfeld 		}
30268f6cb9eSMirco Schönfeld 	}
30368f6cb9eSMirco Schönfeld 
30468f6cb9eSMirco Schönfeld 	if (isType (token, TOKEN_OPEN_CURLY))
30568f6cb9eSMirco Schönfeld 	{
30668f6cb9eSMirco Schönfeld 		if (!readToken (token))
30768f6cb9eSMirco Schönfeld 		{
30868f6cb9eSMirco Schönfeld 			eof = true;
30968f6cb9eSMirco Schönfeld 			goto out;
31068f6cb9eSMirco Schönfeld 		}
31168f6cb9eSMirco Schönfeld 		if (isType (token, TOKEN_IDENTIFIER)){
31268f6cb9eSMirco Schönfeld 			vStringCat (currentid, token->string);
31368f6cb9eSMirco Schönfeld 			vStringStripTrailing (currentid);
31468f6cb9eSMirco Schönfeld 			if (vStringLength (currentid) > 0)
31568f6cb9eSMirco Schönfeld 			{
31668f6cb9eSMirco Schönfeld 				vStringCopy (name->string, currentid);
31768f6cb9eSMirco Schönfeld 				makeBibTag (name, kind);
31868f6cb9eSMirco Schönfeld 			}
31968f6cb9eSMirco Schönfeld 		}
32068f6cb9eSMirco Schönfeld 		else
32168f6cb9eSMirco Schönfeld 		{ // should find an identifier for bib item at first place
32268f6cb9eSMirco Schönfeld 			eof = true;
32368f6cb9eSMirco Schönfeld 			goto out;
32468f6cb9eSMirco Schönfeld 		}
32568f6cb9eSMirco Schönfeld 	}
32668f6cb9eSMirco Schönfeld 
32768f6cb9eSMirco Schönfeld  out:
32868f6cb9eSMirco Schönfeld 	deleteToken (name);
32968f6cb9eSMirco Schönfeld 	vStringDelete (currentid);
33068f6cb9eSMirco Schönfeld 	return eof;
33168f6cb9eSMirco Schönfeld }
33268f6cb9eSMirco Schönfeld 
parseBibFile(tokenInfo * const token)33368f6cb9eSMirco Schönfeld static void parseBibFile (tokenInfo *const token)
33468f6cb9eSMirco Schönfeld {
33568f6cb9eSMirco Schönfeld 	bool eof = false;
33668f6cb9eSMirco Schönfeld 
33768f6cb9eSMirco Schönfeld 	do
33868f6cb9eSMirco Schönfeld 	{
33968f6cb9eSMirco Schönfeld 		if (!readToken (token))
34068f6cb9eSMirco Schönfeld 			break;
34168f6cb9eSMirco Schönfeld 
34268f6cb9eSMirco Schönfeld 		if (isType (token, TOKEN_KEYWORD))
34368f6cb9eSMirco Schönfeld 		{
34468f6cb9eSMirco Schönfeld 			switch (token->keyword)
34568f6cb9eSMirco Schönfeld 			{
34668f6cb9eSMirco Schönfeld 				case KEYWORD_article:
34768f6cb9eSMirco Schönfeld 					eof = parseTag (token, BIBTAG_ARTICLE);
34868f6cb9eSMirco Schönfeld 					break;
34968f6cb9eSMirco Schönfeld 				case KEYWORD_book:
35068f6cb9eSMirco Schönfeld 					eof = parseTag (token, BIBTAG_BOOK);
35168f6cb9eSMirco Schönfeld 					break;
35268f6cb9eSMirco Schönfeld 				case KEYWORD_booklet:
35368f6cb9eSMirco Schönfeld 					eof = parseTag (token, BIBTAG_BOOKLET);
35468f6cb9eSMirco Schönfeld 					break;
35568f6cb9eSMirco Schönfeld 				case KEYWORD_conference:
35668f6cb9eSMirco Schönfeld 					eof = parseTag (token, BIBTAG_CONFERENCE);
35768f6cb9eSMirco Schönfeld 					break;
35868f6cb9eSMirco Schönfeld 				case KEYWORD_inbook:
35968f6cb9eSMirco Schönfeld 					eof = parseTag (token, BIBTAG_INBOOK);
36068f6cb9eSMirco Schönfeld 					break;
36168f6cb9eSMirco Schönfeld 				case KEYWORD_incollection:
36268f6cb9eSMirco Schönfeld 					eof = parseTag (token, BIBTAG_INCOLLECTION);
36368f6cb9eSMirco Schönfeld 					break;
36468f6cb9eSMirco Schönfeld 				case KEYWORD_inproceedings:
36568f6cb9eSMirco Schönfeld 					eof = parseTag (token, BIBTAG_INPROCEEDINGS);
36668f6cb9eSMirco Schönfeld 					break;
36768f6cb9eSMirco Schönfeld 				case KEYWORD_manual:
36868f6cb9eSMirco Schönfeld 					eof = parseTag (token, BIBTAG_MANUAL);
36968f6cb9eSMirco Schönfeld 					break;
37068f6cb9eSMirco Schönfeld 				case KEYWORD_mastersthesis:
37168f6cb9eSMirco Schönfeld 					eof = parseTag (token, BIBTAG_MASTERSTHESIS);
37268f6cb9eSMirco Schönfeld 					break;
37368f6cb9eSMirco Schönfeld 				case KEYWORD_misc:
37468f6cb9eSMirco Schönfeld 					eof = parseTag (token, BIBTAG_MISC);
37568f6cb9eSMirco Schönfeld 					break;
37668f6cb9eSMirco Schönfeld 				case KEYWORD_phdthesis:
37768f6cb9eSMirco Schönfeld 					eof = parseTag (token, BIBTAG_PHDTHESIS);
37868f6cb9eSMirco Schönfeld 					break;
37968f6cb9eSMirco Schönfeld 				case KEYWORD_proceedings:
38068f6cb9eSMirco Schönfeld 					eof = parseTag (token, BIBTAG_PROCEEDINGS);
38168f6cb9eSMirco Schönfeld 					break;
38267536c85SMirco Schoenfeld 				case KEYWORD_string:
38367536c85SMirco Schoenfeld 					eof = parseTag (token, BIBTAG_STRING);
38467536c85SMirco Schoenfeld 					break;
38568f6cb9eSMirco Schönfeld 				case KEYWORD_techreport:
38668f6cb9eSMirco Schönfeld 					eof = parseTag (token, BIBTAG_TECHREPORT);
38768f6cb9eSMirco Schönfeld 					break;
38868f6cb9eSMirco Schönfeld 				case KEYWORD_unpublished:
38968f6cb9eSMirco Schönfeld 					eof = parseTag (token, BIBTAG_UNPUBLISHED);
39068f6cb9eSMirco Schönfeld 					break;
39168f6cb9eSMirco Schönfeld 				default:
39268f6cb9eSMirco Schönfeld 					break;
39368f6cb9eSMirco Schönfeld 			}
39468f6cb9eSMirco Schönfeld 		}
39568f6cb9eSMirco Schönfeld 		if (eof)
39668f6cb9eSMirco Schönfeld 			break;
39768f6cb9eSMirco Schönfeld 	} while (true);
39868f6cb9eSMirco Schönfeld }
39968f6cb9eSMirco Schönfeld 
initialize(const langType language)40068f6cb9eSMirco Schönfeld static void initialize (const langType language)
40168f6cb9eSMirco Schönfeld {
40268f6cb9eSMirco Schönfeld 	Lang_bib = language;
40368f6cb9eSMirco Schönfeld }
40468f6cb9eSMirco Schönfeld 
findBibTags(void)40568f6cb9eSMirco Schönfeld static void findBibTags (void)
40668f6cb9eSMirco Schönfeld {
40768f6cb9eSMirco Schönfeld 	tokenInfo *const token = newToken ();
40868f6cb9eSMirco Schönfeld 
40968f6cb9eSMirco Schönfeld 	parseBibFile (token);
41068f6cb9eSMirco Schönfeld 
41168f6cb9eSMirco Schönfeld 	deleteToken (token);
41268f6cb9eSMirco Schönfeld }
41368f6cb9eSMirco Schönfeld 
41468f6cb9eSMirco Schönfeld /* Create parser definition structure */
BibtexParser(void)41568f6cb9eSMirco Schönfeld extern parserDefinition* BibtexParser (void)
41668f6cb9eSMirco Schönfeld {
41768f6cb9eSMirco Schönfeld 	Assert (ARRAY_SIZE (BibKinds) == BIBTAG_COUNT);
41868f6cb9eSMirco Schönfeld 	static const char *const extensions [] = { "bib", NULL };
419bfb622f8SMirco Schoenfeld 	parserDefinition *const def = parserNew ("BibTeX");
42068f6cb9eSMirco Schönfeld 	def->extensions = extensions;
42168f6cb9eSMirco Schönfeld 	/*
42268f6cb9eSMirco Schönfeld 	 * New definitions for parsing instead of regex
42368f6cb9eSMirco Schönfeld 	 */
42468f6cb9eSMirco Schönfeld 	def->kindTable		= BibKinds;
42568f6cb9eSMirco Schönfeld 	def->kindCount		= ARRAY_SIZE (BibKinds);
42668f6cb9eSMirco Schönfeld 	def->parser				= findBibTags;
42768f6cb9eSMirco Schönfeld 	def->initialize		= initialize;
42868f6cb9eSMirco Schönfeld 	def->keywordTable	= BibKeywordTable;
42968f6cb9eSMirco Schönfeld 	def->keywordCount	= ARRAY_SIZE (BibKeywordTable);
43068f6cb9eSMirco Schönfeld 	return def;
43168f6cb9eSMirco Schönfeld }
432