1 /*
2 * Copyright (c) 2008, David Fishburn
3 * Copyright (c) 2012, Jan Larres
4 * Copyright (c) 2019, Mirco Schönfeld
5 *
6 * This source code is released for free distribution under the terms of the
7 * GNU General Public License version 2 or (at your option) any later version.
8 *
9 * This module contains functions for generating identifiers of entries of Bibtex language files.
10 *
11 * BibTex language "reference":
12 * https://en.wikipedia.org/wiki/BibTeX
13 */
14
15 /*
16 * INCLUDE FILES
17 */
18 #include "general.h" /* must always come first */
19 #include <ctype.h> /* to define isalpha () */
20 #include <string.h>
21
22 #include "debug.h"
23 #include "entry.h"
24 #include "keyword.h"
25 #include "parse.h"
26 #include "read.h"
27 #include "routines.h"
28 #include "vstring.h"
29
30 /*
31 * MACROS
32 */
33 #define isType(token,t) (bool) ((token)->type == (t))
34 #define isKeyword(token,k) (bool) ((token)->keyword == (k))
35 #define isIdentChar(c) \
36 (isalpha (c) || isdigit (c) || (c) == '_' || (c) == '-' || (c) == '+' || (c) == ':')
37
38 /*
39 * DATA DECLARATIONS
40 */
41
42 /*
43 * Used to specify type of keyword.
44 */
45 enum eKeywordId {
46 KEYWORD_article,
47 KEYWORD_book,
48 KEYWORD_booklet,
49 KEYWORD_conference,
50 KEYWORD_inbook,
51 KEYWORD_incollection,
52 KEYWORD_inproceedings,
53 KEYWORD_manual,
54 KEYWORD_mastersthesis,
55 KEYWORD_misc,
56 KEYWORD_phdthesis,
57 KEYWORD_proceedings,
58 KEYWORD_string,
59 KEYWORD_techreport,
60 KEYWORD_unpublished
61 };
62 typedef int keywordId; /* to allow KEYWORD_NONE */
63
64 enum eTokenType {
65 /* 0..255 are the byte's value. Some are named for convenience */
66 TOKEN_OPEN_CURLY = '{',
67 /* above is special types */
68 TOKEN_UNDEFINED = 256,
69 TOKEN_KEYWORD,
70 TOKEN_IDENTIFIER
71 };
72 typedef int tokenType;
73
74 typedef struct sTokenInfo {
75 tokenType type;
76 keywordId keyword;
77 vString * string;
78 unsigned long lineNumber;
79 MIOPos filePosition;
80 } tokenInfo;
81
82 /*
83 * DATA DEFINITIONS
84 */
85
86 static langType Lang_bib;
87
88 typedef enum {
89 BIBTAG_ARTICLE,
90 BIBTAG_BOOK,
91 BIBTAG_BOOKLET,
92 BIBTAG_CONFERENCE,
93 BIBTAG_INBOOK,
94 BIBTAG_INCOLLECTION,
95 BIBTAG_INPROCEEDINGS,
96 BIBTAG_MANUAL,
97 BIBTAG_MASTERSTHESIS,
98 BIBTAG_MISC,
99 BIBTAG_PHDTHESIS,
100 BIBTAG_PROCEEDINGS,
101 BIBTAG_STRING,
102 BIBTAG_TECHREPORT,
103 BIBTAG_UNPUBLISHED,
104 BIBTAG_COUNT
105 } bibKind;
106
107 static kindDefinition BibKinds [] = {
108 { true, 'a', "article", "article" },
109 { true, 'b', "book", "book" },
110 { true, 'B', "booklet", "booklet" },
111 { true, 'c', "conference", "conference" },
112 { true, 'i', "inbook", "inbook" },
113 { true, 'I', "incollection", "incollection" },
114 { true, 'j', "inproceedings", "inproceedings" },
115 { true, 'm', "manual", "manual" },
116 { true, 'M', "mastersthesis", "mastersthesis" },
117 { true, 'n', "misc", "misc" },
118 { true, 'p', "phdthesis", "phdthesis" },
119 { true, 'P', "proceedings", "proceedings" },
120 { true, 's', "string", "string" },
121 { true, 't', "techreport", "techreport" },
122 { true, 'u', "unpublished", "unpublished" }
123 };
124
125 static const keywordTable BibKeywordTable [] = {
126 /* keyword keyword ID */
127 { "article", KEYWORD_article },
128 { "book", KEYWORD_book },
129 { "booklet", KEYWORD_booklet },
130 { "conference", KEYWORD_conference },
131 { "inbook", KEYWORD_inbook },
132 { "incollection", KEYWORD_incollection },
133 { "inproceedings",KEYWORD_inproceedings },
134 { "manual", KEYWORD_manual },
135 { "mastersthesis",KEYWORD_mastersthesis },
136 { "misc", KEYWORD_misc },
137 { "phdthesis", KEYWORD_phdthesis },
138 { "proceedings", KEYWORD_proceedings },
139 { "string", KEYWORD_string },
140 { "techreport", KEYWORD_techreport },
141 { "unpublished", KEYWORD_unpublished }
142 };
143
144 /*
145 * FUNCTION DEFINITIONS
146 */
147
newToken(void)148 static tokenInfo *newToken (void)
149 {
150 tokenInfo *const token = xMalloc (1, tokenInfo);
151
152 token->type = TOKEN_UNDEFINED;
153 token->keyword = KEYWORD_NONE;
154 token->string = vStringNew ();
155 token->lineNumber = getInputLineNumber ();
156 token->filePosition = getInputFilePosition ();
157
158 return token;
159 }
160
deleteToken(tokenInfo * const token)161 static void deleteToken (tokenInfo *const token)
162 {
163 vStringDelete (token->string);
164 eFree (token);
165 }
166
167 /*
168 * Tag generation functions
169 */
makeBibTag(tokenInfo * const token,bibKind kind)170 static void makeBibTag (tokenInfo *const token, bibKind kind)
171 {
172 if (BibKinds [kind].enabled)
173 {
174 const char *const name = vStringValue (token->string);
175 tagEntryInfo e;
176 initTagEntry (&e, name, kind);
177
178 e.lineNumber = token->lineNumber;
179 e.filePosition = token->filePosition;
180
181 makeTagEntry (&e);
182 }
183 }
184
185 /*
186 * Parsing functions
187 */
188
189 /*
190 * Read a C identifier beginning with "firstChar" and places it into
191 * "name".
192 */
parseIdentifier(vString * const string,const int firstChar)193 static void parseIdentifier (vString *const string, const int firstChar)
194 {
195 int c = firstChar;
196 Assert (isIdentChar (c));
197 do
198 {
199 vStringPut (string, c);
200 c = getcFromInputFile ();
201 } while (c != EOF && isIdentChar (c));
202 if (c != EOF)
203 ungetcToInputFile (c); /* unget non-identifier character */
204 }
205
readToken(tokenInfo * const token)206 static bool readToken (tokenInfo *const token)
207 {
208 int c;
209
210 token->type = TOKEN_UNDEFINED;
211 token->keyword = KEYWORD_NONE;
212 vStringClear (token->string);
213
214 getNextChar:
215
216 do
217 {
218 c = getcFromInputFile ();
219 }
220 while (c == '\t' || c == ' ' || c == '\n');
221
222 token->lineNumber = getInputLineNumber ();
223 token->filePosition = getInputFilePosition ();
224
225 token->type = (unsigned char) c;
226 switch (c)
227 {
228 case EOF: return false;
229
230 case '@':
231 /*
232 * All Bib entries start with an at symbol.
233 * Check if the next character is an alpha character
234 * else it is not a potential tex tag.
235 */
236 c = getcFromInputFile ();
237 if (! isalpha (c))
238 ungetcToInputFile (c);
239 else
240 {
241 vStringPut (token->string, '@');
242 parseIdentifier (token->string, c);
243 token->keyword = lookupCaseKeyword (vStringValue (token->string) + 1, Lang_bib);
244 if (isKeyword (token, KEYWORD_NONE))
245 token->type = TOKEN_IDENTIFIER;
246 else
247 token->type = TOKEN_KEYWORD;
248 }
249 break;
250 case '%':
251 skipToCharacterInInputFile ('\n'); /* % are single line comments */
252 goto getNextChar;
253 break;
254 default:
255 if (isIdentChar (c))
256 {
257 parseIdentifier (token->string, c);
258 token->type = TOKEN_IDENTIFIER;
259 }
260 break;
261 }
262 return true;
263 }
264
copyToken(tokenInfo * const dest,tokenInfo * const src)265 static void copyToken (tokenInfo *const dest, tokenInfo *const src)
266 {
267 dest->lineNumber = src->lineNumber;
268 dest->filePosition = src->filePosition;
269 dest->type = src->type;
270 dest->keyword = src->keyword;
271 vStringCopy (dest->string, src->string);
272 }
273
274 /*
275 * Scanning functions
276 */
277
parseTag(tokenInfo * const token,bibKind kind)278 static bool parseTag (tokenInfo *const token, bibKind kind)
279 {
280 tokenInfo * const name = newToken ();
281 vString * currentid;
282 bool eof = false;
283
284 currentid = vStringNew ();
285 /*
286 * Bib entries are of these formats:
287 * @article{identifier,
288 * author="John Doe"}
289 *
290 * When a keyword is found, loop through all words up to
291 * a comma brace for the tag name.
292 *
293 */
294 if (isType (token, TOKEN_KEYWORD))
295 {
296 copyToken (name, token);
297 if (!readToken (token))
298 {
299 eof = true;
300 goto out;
301 }
302 }
303
304 if (isType (token, TOKEN_OPEN_CURLY))
305 {
306 if (!readToken (token))
307 {
308 eof = true;
309 goto out;
310 }
311 if (isType (token, TOKEN_IDENTIFIER)){
312 vStringCat (currentid, token->string);
313 vStringStripTrailing (currentid);
314 if (vStringLength (currentid) > 0)
315 {
316 vStringCopy (name->string, currentid);
317 makeBibTag (name, kind);
318 }
319 }
320 else
321 { // should find an identifier for bib item at first place
322 eof = true;
323 goto out;
324 }
325 }
326
327 out:
328 deleteToken (name);
329 vStringDelete (currentid);
330 return eof;
331 }
332
parseBibFile(tokenInfo * const token)333 static void parseBibFile (tokenInfo *const token)
334 {
335 bool eof = false;
336
337 do
338 {
339 if (!readToken (token))
340 break;
341
342 if (isType (token, TOKEN_KEYWORD))
343 {
344 switch (token->keyword)
345 {
346 case KEYWORD_article:
347 eof = parseTag (token, BIBTAG_ARTICLE);
348 break;
349 case KEYWORD_book:
350 eof = parseTag (token, BIBTAG_BOOK);
351 break;
352 case KEYWORD_booklet:
353 eof = parseTag (token, BIBTAG_BOOKLET);
354 break;
355 case KEYWORD_conference:
356 eof = parseTag (token, BIBTAG_CONFERENCE);
357 break;
358 case KEYWORD_inbook:
359 eof = parseTag (token, BIBTAG_INBOOK);
360 break;
361 case KEYWORD_incollection:
362 eof = parseTag (token, BIBTAG_INCOLLECTION);
363 break;
364 case KEYWORD_inproceedings:
365 eof = parseTag (token, BIBTAG_INPROCEEDINGS);
366 break;
367 case KEYWORD_manual:
368 eof = parseTag (token, BIBTAG_MANUAL);
369 break;
370 case KEYWORD_mastersthesis:
371 eof = parseTag (token, BIBTAG_MASTERSTHESIS);
372 break;
373 case KEYWORD_misc:
374 eof = parseTag (token, BIBTAG_MISC);
375 break;
376 case KEYWORD_phdthesis:
377 eof = parseTag (token, BIBTAG_PHDTHESIS);
378 break;
379 case KEYWORD_proceedings:
380 eof = parseTag (token, BIBTAG_PROCEEDINGS);
381 break;
382 case KEYWORD_string:
383 eof = parseTag (token, BIBTAG_STRING);
384 break;
385 case KEYWORD_techreport:
386 eof = parseTag (token, BIBTAG_TECHREPORT);
387 break;
388 case KEYWORD_unpublished:
389 eof = parseTag (token, BIBTAG_UNPUBLISHED);
390 break;
391 default:
392 break;
393 }
394 }
395 if (eof)
396 break;
397 } while (true);
398 }
399
initialize(const langType language)400 static void initialize (const langType language)
401 {
402 Lang_bib = language;
403 }
404
findBibTags(void)405 static void findBibTags (void)
406 {
407 tokenInfo *const token = newToken ();
408
409 parseBibFile (token);
410
411 deleteToken (token);
412 }
413
414 /* Create parser definition structure */
BibtexParser(void)415 extern parserDefinition* BibtexParser (void)
416 {
417 Assert (ARRAY_SIZE (BibKinds) == BIBTAG_COUNT);
418 static const char *const extensions [] = { "bib", NULL };
419 parserDefinition *const def = parserNew ("BibTeX");
420 def->extensions = extensions;
421 /*
422 * New definitions for parsing instead of regex
423 */
424 def->kindTable = BibKinds;
425 def->kindCount = ARRAY_SIZE (BibKinds);
426 def->parser = findBibTags;
427 def->initialize = initialize;
428 def->keywordTable = BibKeywordTable;
429 def->keywordCount = ARRAY_SIZE (BibKeywordTable);
430 return def;
431 }
432