1 /*
2 *
3 * Copyright (c) 2007-2011, Nick Treleaven
4 *
5 * This source code is released for free distribution under the terms of the
6 * GNU General Public License version 2 or (at your option) any later version.
7 *
8 * This module contains functions for generating tags for reStructuredText (reST) files.
9 *
10 * This module was ported from geany.
11 *
12 * References:
13 * https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html
14 */
15
16 /*
17 * INCLUDE FILES
18 */
19 #include "general.h" /* must always come first */
20
21 #include <ctype.h>
22 #include <string.h>
23
24 #include "parse.h"
25 #include "read.h"
26 #include "vstring.h"
27 #include "nestlevel.h"
28 #include "entry.h"
29 #include "routines.h"
30 #include "field.h"
31 #include "htable.h"
32 #include "debug.h"
33
34 /*
35 * DATA DEFINITIONS
36 */
37 typedef enum {
38 K_EOF = -1,
39 K_TITLE = 0,
40 K_SUBTITLE,
41 K_CHAPTER,
42 K_SECTION,
43 K_SUBSECTION,
44 K_SUBSUBSECTION,
45 SECTION_COUNT,
46 K_CITATION = SECTION_COUNT,
47 K_TARGET,
48 K_SUBSTDEF,
49 } rstKind;
50
51 static kindDefinition RstKinds[] = {
52 { true, 'H', "title", "titles"},
53 { true, 'h', "subtitle", "sub titles" },
54 { true, 'c', "chapter", "chapters"},
55 { true, 's', "section", "sections" },
56 { true, 'S', "subsection", "subsections" },
57 { true, 't', "subsubsection", "subsubsections" },
58 { true, 'C', "citation", "citations"},
59 { true, 'T', "target", "targets" },
60 { true, 'd', "substdef", "substitute definitions" },
61 };
62
63 typedef enum {
64 F_SECTION_MARKER,
65 F_SECTION_OVERLINE,
66 } rstField;
67
68 static fieldDefinition RstFields [] = {
69 {
70 .name = "sectionMarker",
71 .description = "character used for declaring section",
72 .enabled = false,
73 },
74 {
75 .name = "overline",
76 .description = "whether using overline & underline for declaring section",
77 .enabled = false,
78 .dataType = FIELDTYPE_BOOL
79 },
80 };
81
82 static NestingLevels *nestingLevels = NULL;
83
84 struct sectionTracker {
85 char kindchar;
86 bool overline;
87 int count;
88 };
89
90 struct olineTracker
91 {
92 char c;
93 size_t len;
94 };
95
96 /*
97 * FUNCTION DEFINITIONS
98 */
99
getNestingLevel(const int kind)100 static NestingLevel *getNestingLevel(const int kind)
101 {
102 NestingLevel *nl;
103 tagEntryInfo *e;
104
105 int d = 0;
106
107 if (kind > K_EOF)
108 {
109 d++;
110 /* 1. we want the line before the '---' underline chars */
111 d++;
112 /* 2. we want the line before the next section/chapter title. */
113 }
114
115 while (1)
116 {
117 nl = nestingLevelsGetCurrent(nestingLevels);
118 e = getEntryOfNestingLevel (nl);
119 if ((nl && (e == NULL)) || (e && e->kindIndex >= kind))
120 {
121 if (e)
122 e->extensionFields.endLine = (getInputLineNumber() - d);
123 nestingLevelsPop(nestingLevels);
124 }
125 else
126 break;
127 }
128 return nl;
129 }
130
makeTargetRstTag(const vString * const name,rstKind kindex)131 static int makeTargetRstTag(const vString* const name, rstKind kindex)
132 {
133 tagEntryInfo e;
134
135 initTagEntry (&e, vStringValue (name), kindex);
136
137 const NestingLevel *nl = nestingLevelsGetCurrent(nestingLevels);
138 if (nl)
139 e.extensionFields.scopeIndex = nl->corkIndex;
140
141 return makeTagEntry (&e);
142 }
143
makeSectionRstTag(const vString * const name,const int kind,const MIOPos filepos,char marker,bool overline)144 static void makeSectionRstTag(const vString* const name, const int kind, const MIOPos filepos,
145 char marker, bool overline)
146 {
147 const NestingLevel *const nl = getNestingLevel(kind);
148 tagEntryInfo *parent;
149
150 int r = CORK_NIL;
151
152 if (vStringLength (name) > 0)
153 {
154 tagEntryInfo e;
155 char m [2] = { [1] = '\0' };
156
157 initTagEntry (&e, vStringValue (name), kind);
158
159 e.lineNumber--; /* we want the line before the '---' underline chars */
160 e.filePosition = filepos;
161
162 parent = getEntryOfNestingLevel (nl);
163 if (parent && (parent->kindIndex < kind))
164 e.extensionFields.scopeIndex = nl->corkIndex;
165
166 m[0] = marker;
167 attachParserField (&e, false, RstFields [F_SECTION_MARKER].ftype, m);
168
169 if (overline)
170 attachParserField (&e, false, RstFields [F_SECTION_OVERLINE].ftype, "");
171
172 r = makeTagEntry (&e);
173 }
174 nestingLevelsPush(nestingLevels, r);
175 }
176
177
178 /* checks if str is all the same character */
issame(const char * str)179 static bool issame(const char *str)
180 {
181 char first = *str;
182
183 while (*str)
184 {
185 char c;
186
187 str++;
188 c = *str;
189 if (c && c != first)
190 return false;
191 }
192 return true;
193 }
194
195
get_kind(char c,bool overline,struct sectionTracker tracker[])196 static int get_kind(char c, bool overline, struct sectionTracker tracker[])
197 {
198 int i;
199
200 for (i = 0; i < SECTION_COUNT; i++)
201 {
202 if (tracker[i].kindchar == c && tracker[i].overline == overline)
203 {
204 tracker[i].count++;
205 return i;
206 }
207
208 if (tracker[i].count == 0)
209 {
210 tracker[i].count = 1;
211 tracker[i].kindchar = c;
212 tracker[i].overline = overline;
213 return i;
214 }
215 }
216 return -1;
217 }
218
219
220 /* computes the length of an UTF-8 string
221 * if the string doesn't look like UTF-8, return -1 */
utf8_strlen(const char * buf,int buf_len)222 static int utf8_strlen(const char *buf, int buf_len)
223 {
224 int len = 0;
225 const char *end = buf + buf_len;
226
227 for (len = 0; buf < end; len ++)
228 {
229 /* perform quick and naive validation (no sub-byte checking) */
230 if (! (*buf & 0x80))
231 buf ++;
232 else if ((*buf & 0xe0) == 0xc0)
233 buf += 2;
234 else if ((*buf & 0xf0) == 0xe0)
235 buf += 3;
236 else if ((*buf & 0xf8) == 0xf0)
237 buf += 4;
238 else /* not a valid leading UTF-8 byte, abort */
239 return -1;
240
241 if (buf > end) /* incomplete last byte */
242 return -1;
243 }
244
245 return len;
246 }
247
248
is_markup_line(const unsigned char * line,char reftype)249 static const unsigned char *is_markup_line (const unsigned char *line, char reftype)
250 {
251 if ((line [0] == '.') && (line [1] == '.') && (line [2] == ' ')
252 && (line [3] == reftype))
253 return line + 4;
254 return NULL;
255 }
256
capture_markup(const unsigned char * target_line,char defaultTerminator,rstKind kindex)257 static int capture_markup (const unsigned char *target_line, char defaultTerminator, rstKind kindex)
258 {
259 vString *name = vStringNew ();
260 unsigned char terminator;
261 int r = CORK_NIL;
262
263 if (*target_line == '`')
264 terminator = '`';
265 else if (!isspace (*target_line) && *target_line != '\0')
266 {
267 /* "Simple reference names are single words consisting of
268 * alphanumerics plus isolated (no two adjacent) internal
269 * hyphens, underscores, periods, colons and plus signs; no
270 * whitespace or other characters are allowed."
271 * -- http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#reference-names
272 */
273 vStringPut (name, *target_line);
274 terminator = defaultTerminator;
275 }
276 else
277 goto out;
278
279 target_line++;
280
281
282 bool escaped = false;
283 while (*target_line != '\0')
284 {
285 if (escaped)
286 {
287 vStringPut (name, *target_line);
288 escaped = false;
289 }
290 else
291 {
292 if (*target_line == '\\')
293 {
294 vStringPut (name, *target_line);
295 escaped = true;
296 }
297 else if (*target_line == terminator)
298 break;
299 else
300 vStringPut (name, *target_line);
301 }
302 target_line++;
303 }
304
305 if (vStringLength (name) == 0)
306 goto out;
307
308 r = makeTargetRstTag (name, kindex);
309
310 out:
311 vStringDelete (name);
312 return r;
313 }
314
overline_clear(struct olineTracker * ol)315 static void overline_clear(struct olineTracker *ol)
316 {
317 ol->c = 0;
318 ol->len = 0;
319 }
320
overline_set(struct olineTracker * ol,char c,size_t len)321 static void overline_set(struct olineTracker *ol, char c, size_t len)
322 {
323 ol->c = c;
324 ol->len = len;
325 }
326
has_overline(struct olineTracker * ol)327 static bool has_overline(struct olineTracker *ol)
328 {
329 return (ol->c != 0);
330 }
331
getFosterEntry(tagEntryInfo * e,int shift)332 static int getFosterEntry(tagEntryInfo *e, int shift)
333 {
334 int r = CORK_NIL;
335
336 while (shift-- > 0)
337 {
338 r = e->extensionFields.scopeIndex;
339 Assert(r != CORK_NIL);
340 e = getEntryInCorkQueue(r);
341 Assert(e);
342 }
343 return r;
344 }
345
shiftKinds(int shift,rstKind baseKind)346 static void shiftKinds(int shift, rstKind baseKind)
347 {
348 size_t count = countEntryInCorkQueue();
349 hashTable *remapping_table = hashTableNew (count,
350 hashPtrhash,
351 hashPtreq, NULL, NULL);
352 hashTableSetValueForUnknownKey(remapping_table, HT_INT_TO_PTR(CORK_NIL), NULL);
353
354 for (int index = 0; index < count; index++)
355 {
356 tagEntryInfo *e = getEntryInCorkQueue(index);
357 if (e && (baseKind <= e->kindIndex && e->kindIndex < SECTION_COUNT))
358 {
359 e->kindIndex += shift;
360 if (e->kindIndex >= SECTION_COUNT)
361 {
362 markTagPlaceholder(e, true);
363
364 int foster_parent = getFosterEntry(e, shift);
365 Assert (foster_parent != CORK_NIL);
366 hashTablePutItem(remapping_table, HT_INT_TO_PTR(index),
367 HT_INT_TO_PTR(foster_parent));
368 }
369 }
370 }
371
372 for (int index = 0; index < count; index++)
373 {
374 tagEntryInfo *e = getEntryInCorkQueue(index);
375 if (e && e->extensionFields.scopeIndex != CORK_NIL)
376 {
377 void *remapping_to = hashTableGetItem (remapping_table,
378 HT_INT_TO_PTR(e->extensionFields.scopeIndex));
379 if (HT_PTR_TO_INT(remapping_to) != CORK_NIL)
380 e->extensionFields.scopeIndex = HT_PTR_TO_INT(remapping_to);
381 }
382 }
383 hashTableDelete(remapping_table);
384 }
385
adjustSectionKinds(struct sectionTracker section_tracker[])386 static void adjustSectionKinds(struct sectionTracker section_tracker[])
387 {
388 if (section_tracker[K_TITLE].count > 1)
389 {
390 shiftKinds(2, K_TITLE);
391 return;
392 }
393
394 if (section_tracker[K_TITLE].count == 1
395 && section_tracker[K_SUBTITLE].count > 1)
396 {
397 shiftKinds(1, K_SUBTITLE);
398 return;
399 }
400 }
401
inlineTagScope(tagEntryInfo * e,int parent_index)402 static void inlineTagScope(tagEntryInfo *e, int parent_index)
403 {
404 tagEntryInfo *parent = getEntryInCorkQueue (parent_index);
405 if (parent)
406 {
407 e->extensionFields.scopeKindIndex = parent->kindIndex;
408 e->extensionFields.scopeName = eStrdup(parent->name);
409 e->extensionFields.scopeIndex = CORK_NIL;
410 }
411 }
412
inlineScopes(void)413 static void inlineScopes (void)
414 {
415 /* TODO
416 Following code makes the scope information full qualified form.
417 Do users want the full qualified form?
418 --- ./Units/rst.simple.d/expected.tags 2015-12-18 01:32:35.574255617 +0900
419 +++ /home/yamato/var/ctags-github/Units/rst.simple.d/FILTERED.tmp 2016-05-05 03:05:38.165604756 +0900
420 @@ -5,2 +5,2 @@
421 -Subsection 1.1.1 input.rst /^Subsection 1.1.1$/;" S section:Section 1.1
422 -Subsubsection 1.1.1.1 input.rst /^Subsubsection 1.1.1.1$/;" t subsection:Subsection 1.1.1
423 +Subsection 1.1.1 input.rst /^Subsection 1.1.1$/;" S section:Chapter 1.Section 1.1
424 +Subsubsection 1.1.1.1 input.rst /^Subsubsection 1.1.1.1$/;" t subsection:Chapter 1.Section 1.1.Subsection 1.1.1
425 */
426 size_t count = countEntryInCorkQueue();
427 for (int index = 0; index < count; index++)
428 {
429 tagEntryInfo *e = getEntryInCorkQueue(index);
430
431 if (e && e->extensionFields.scopeIndex != CORK_NIL)
432 inlineTagScope(e, e->extensionFields.scopeIndex);
433 }
434 }
435
findRstTags(void)436 static void findRstTags (void)
437 {
438 vString *name = vStringNew ();
439 MIOPos filepos;
440 const unsigned char *line;
441 const unsigned char *markup_line;
442 struct sectionTracker section_tracker[SECTION_COUNT];
443 struct olineTracker overline;
444
445 memset(&filepos, 0, sizeof(filepos));
446 memset(section_tracker, 0, sizeof section_tracker);
447 overline_clear(&overline);
448 nestingLevels = nestingLevelsNew(0);
449
450 while ((line = readLineFromInputFile ()) != NULL)
451 {
452 if ((markup_line = is_markup_line (line, '_')) != NULL)
453 {
454 overline_clear(&overline);
455 /* Handle .. _target:
456 * http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#hyperlink-targets
457 */
458 if (capture_markup (markup_line, ':', K_TARGET) != CORK_NIL)
459 {
460 vStringClear (name);
461 continue;
462 }
463 }
464 else if ((markup_line = is_markup_line (line, '[')) != NULL)
465 {
466 overline_clear(&overline);
467 /* Handle .. [citation]
468 * https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#citations
469 */
470 if (capture_markup (markup_line, ']', K_CITATION) != CORK_NIL)
471 {
472 vStringClear (name);
473 continue;
474 }
475 }
476 else if ((markup_line = is_markup_line (line, '|')) != NULL)
477 {
478 overline_clear(&overline);
479 /* Hanle .. |substitute definition|
480 * https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#substitution-definitions
481 */
482 if (capture_markup (markup_line, '|', K_SUBSTDEF) != CORK_NIL)
483 {
484 vStringClear (name);
485 continue;
486 }
487 }
488
489 int line_len = strlen((const char*) line);
490 int name_len_bytes = vStringLength(name);
491 /* FIXME: this isn't right, actually we need the real display width,
492 * taking into account double-width characters and stuff like that.
493 * But duh. */
494 int name_len = utf8_strlen(vStringValue(name), name_len_bytes);
495
496 /* if the name doesn't look like UTF-8, assume one-byte charset */
497 if (name_len < 0)
498 name_len = name_len_bytes;
499
500 /* overline may come after an empty line (or begging of file). */
501 if (name_len_bytes == 0 && line_len > 0 &&
502 ispunct(line[0]) && issame((const char*) line))
503 {
504 overline_set(&overline, *line, line_len);
505 continue;
506 }
507
508 /* underlines must be the same length or more */
509 if (line_len >= name_len && name_len > 0 &&
510 ispunct(line[0]) && issame((const char*) line))
511 {
512 char c = line[0];
513 bool o = (overline.c == c && overline.len == line_len);
514 int kind = get_kind(c, o, section_tracker);
515
516 overline_clear(&overline);
517
518 if (kind >= 0)
519 {
520 makeSectionRstTag(name, kind, filepos, c, o);
521 vStringClear(name);
522 continue;
523 }
524 }
525
526 if (has_overline(&overline))
527 {
528 if (name_len > 0)
529 {
530 /*
531 * Though we saw an overline and a section title text,
532 * we cannot find the associated underline.
533 * In that case, we must reset the state of tracking
534 * overline.
535 */
536 overline_clear(&overline);
537 }
538
539 /*
540 * We san an overline. The line is the candidate
541 * of a section title text. Skip the prefixed whitespaces.
542 */
543 while (isspace(*line))
544 line++;
545 }
546
547 vStringClear (name);
548 if (!isspace(*line))
549 {
550 vStringCatS(name, (const char*)line);
551 vStringStripTrailing (name);
552 filepos = getInputFilePosition();
553 }
554 }
555 /* Force popping all nesting levels */
556 getNestingLevel (K_EOF);
557 vStringDelete (name);
558 nestingLevelsFree(nestingLevels);
559
560 adjustSectionKinds(section_tracker);
561 inlineScopes();
562 }
563
RstParser(void)564 extern parserDefinition* RstParser (void)
565 {
566 static const char *const extensions [] = { "rest", "reST", "rst", NULL };
567 parserDefinition* const def = parserNew ("ReStructuredText");
568 static const char *const aliases[] = {
569 "rst", /* The name of emacs's mode */
570 NULL
571 };
572
573 def->kindTable = RstKinds;
574 def->kindCount = ARRAY_SIZE (RstKinds);
575 def->extensions = extensions;
576 def->aliases = aliases;
577 def->parser = findRstTags;
578
579 def->fieldTable = RstFields;
580 def->fieldCount = ARRAY_SIZE (RstFields);
581
582 def->useCork = CORK_QUEUE;
583
584 return def;
585 }
586