/*
*
*   Copyright (c) 2007-2011, Nick Treleaven
*
*   This source code is released for free distribution under the terms of the
*   GNU General Public License version 2 or (at your option) any later version.
*
*   This module contains functions for generating tags for reStructuredText (reST) files.
*
*   This module was ported from geany.
*
*   References:
*      https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html
*/

/*
*   INCLUDE FILES
*/
#include "general.h"	/* must always come first */

#include <ctype.h>
#include <string.h>

#include "parse.h"
#include "read.h"
#include "vstring.h"
#include "nestlevel.h"
#include "entry.h"
#include "routines.h"
#include "field.h"
#include "htable.h"
#include "debug.h"

/*
*   DATA DEFINITIONS
*/
typedef enum {
	K_EOF = -1,
	K_TITLE = 0,
	K_SUBTITLE,
	K_CHAPTER,
	K_SECTION,
	K_SUBSECTION,
	K_SUBSUBSECTION,
	SECTION_COUNT,
	K_CITATION = SECTION_COUNT,
	K_TARGET,
	K_SUBSTDEF,
} rstKind;

static kindDefinition RstKinds[] = {
	{ true, 'H', "title",         "titles"},
	{ true, 'h', "subtitle",      "sub titles" },
	{ true, 'c', "chapter",       "chapters"},
	{ true, 's', "section",       "sections" },
	{ true, 'S', "subsection",    "subsections" },
	{ true, 't', "subsubsection", "subsubsections" },
	{ true, 'C', "citation",      "citations"},
	{ true, 'T', "target",        "targets" },
	{ true, 'd', "substdef",      "substitute definitions" },
};

typedef enum {
	F_SECTION_MARKER,
	F_SECTION_OVERLINE,
} rstField;

static fieldDefinition RstFields [] = {
	{
		.name = "sectionMarker",
		.description = "character used for declaring section",
		.enabled = false,
	},
	{
		.name = "overline",
		.description = "whether using overline & underline for declaring section",
		.enabled = false,
		.dataType = FIELDTYPE_BOOL
	},
};

static NestingLevels *nestingLevels = NULL;

struct sectionTracker {
	char kindchar;
	bool overline;
	int count;
};

struct olineTracker
{
	char c;
	size_t len;
};

/*
*   FUNCTION DEFINITIONS
*/

static NestingLevel *getNestingLevel(const int kind)
{
	NestingLevel *nl;
	tagEntryInfo *e;

	int d = 0;

	if (kind > K_EOF)
	{
		d++;
		/* 1. we want the line before the '---' underline chars */
		d++;
		/* 2. we want the line before the next section/chapter title. */
	}

	while (1)
	{
		nl = nestingLevelsGetCurrent(nestingLevels);
		e = getEntryOfNestingLevel (nl);
		if ((nl && (e == NULL)) || (e && e->kindIndex >= kind))
		{
			if (e)
				e->extensionFields.endLine = (getInputLineNumber() - d);
			nestingLevelsPop(nestingLevels);
		}
		else
			break;
	}
	return nl;
}

static int makeTargetRstTag(const vString* const name, rstKind kindex)
{
	tagEntryInfo e;

	initTagEntry (&e, vStringValue (name), kindex);

	const NestingLevel *nl = nestingLevelsGetCurrent(nestingLevels);
	if (nl)
		e.extensionFields.scopeIndex = nl->corkIndex;

	return makeTagEntry (&e);
}

static void makeSectionRstTag(const vString* const name, const int kind, const MIOPos filepos,
		       char marker, bool overline)
{
	const NestingLevel *const nl = getNestingLevel(kind);
	tagEntryInfo *parent;

	int r = CORK_NIL;

	if (vStringLength (name) > 0)
	{
		tagEntryInfo e;
		char m [2] = { [1] = '\0' };

		initTagEntry (&e, vStringValue (name), kind);

		e.lineNumber--;	/* we want the line before the '---' underline chars */
		e.filePosition = filepos;

		parent = getEntryOfNestingLevel (nl);
		if (parent && (parent->kindIndex < kind))
			e.extensionFields.scopeIndex = nl->corkIndex;

		m[0] = marker;
		attachParserField (&e, false, RstFields [F_SECTION_MARKER].ftype, m);

		if (overline)
			attachParserField (&e, false, RstFields [F_SECTION_OVERLINE].ftype, "");

		r = makeTagEntry (&e);
	}
	nestingLevelsPush(nestingLevels, r);
}


/* checks if str is all the same character */
static bool issame(const char *str)
{
	char first = *str;

	while (*str)
	{
		char c;

		str++;
		c = *str;
		if (c && c != first)
			return false;
	}
	return true;
}


static int get_kind(char c, bool overline, struct sectionTracker tracker[])
{
	int i;

	for (i = 0; i < SECTION_COUNT; i++)
	{
		if (tracker[i].kindchar == c && tracker[i].overline == overline)
		{
			tracker[i].count++;
			return i;
		}

		if (tracker[i].count == 0)
		{
			tracker[i].count = 1;
			tracker[i].kindchar = c;
			tracker[i].overline = overline;
			return i;
		}
	}
	return -1;
}


/* computes the length of an UTF-8 string
 * if the string doesn't look like UTF-8, return -1 */
static int utf8_strlen(const char *buf, int buf_len)
{
	int len = 0;
	const char *end = buf + buf_len;

	for (len = 0; buf < end; len ++)
	{
		/* perform quick and naive validation (no sub-byte checking) */
		if (! (*buf & 0x80))
			buf ++;
		else if ((*buf & 0xe0) == 0xc0)
			buf += 2;
		else if ((*buf & 0xf0) == 0xe0)
			buf += 3;
		else if ((*buf & 0xf8) == 0xf0)
			buf += 4;
		else /* not a valid leading UTF-8 byte, abort */
			return -1;

		if (buf > end) /* incomplete last byte */
			return -1;
	}

	return len;
}


static const unsigned char *is_markup_line (const unsigned char *line, char reftype)
{
	if ((line [0] == '.') && (line [1] == '.') && (line [2] == ' ')
		&& (line [3] == reftype))
		return line + 4;
	return NULL;
}

static int capture_markup (const unsigned char *target_line, char defaultTerminator, rstKind kindex)
{
	vString *name = vStringNew ();
	unsigned char terminator;
	int r = CORK_NIL;

	if (*target_line == '`')
		terminator = '`';
	else if (!isspace (*target_line) && *target_line != '\0')
	{
		/* "Simple reference names are single words consisting of
		 * alphanumerics plus isolated (no two adjacent) internal
		 * hyphens, underscores, periods, colons and plus signs; no
		 * whitespace or other characters are allowed."
		 * -- http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#reference-names
		 */
		vStringPut (name, *target_line);
		terminator = defaultTerminator;
	}
	else
		goto out;

	target_line++;


	bool escaped = false;
	while (*target_line != '\0')
	{
		if (escaped)
		{
			vStringPut (name, *target_line);
			escaped = false;
		}
		else
		{
			if (*target_line == '\\')
			{
				vStringPut (name, *target_line);
				escaped = true;
			}
			else if (*target_line == terminator)
				break;
			else
				vStringPut (name, *target_line);
		}
		target_line++;
	}

	if (vStringLength (name) == 0)
		goto out;

	r = makeTargetRstTag (name, kindex);

 out:
	vStringDelete (name);
	return r;
}

static void overline_clear(struct olineTracker *ol)
{
	ol->c = 0;
	ol->len = 0;
}

static void overline_set(struct olineTracker *ol, char c, size_t len)
{
	ol->c = c;
	ol->len = len;
}

static bool has_overline(struct olineTracker *ol)
{
	return (ol->c != 0);
}

static int getFosterEntry(tagEntryInfo *e, int shift)
{
	int r = CORK_NIL;

	while (shift-- > 0)
	{
		r = e->extensionFields.scopeIndex;
		Assert(r != CORK_NIL);
		e = getEntryInCorkQueue(r);
		Assert(e);
	}
	return r;
}

static void shiftKinds(int shift, rstKind baseKind)
{
	size_t count = countEntryInCorkQueue();
	hashTable *remapping_table = hashTableNew (count,
											   hashPtrhash,
											   hashPtreq, NULL, NULL);
	hashTableSetValueForUnknownKey(remapping_table, HT_INT_TO_PTR(CORK_NIL), NULL);

	for (int index = 0; index < count; index++)
	{
		tagEntryInfo *e = getEntryInCorkQueue(index);
		if (e && (baseKind <= e->kindIndex && e->kindIndex < SECTION_COUNT))
		{
			e->kindIndex += shift;
			if (e->kindIndex >= SECTION_COUNT)
			{
				markTagPlaceholder(e, true);

				int foster_parent = getFosterEntry(e, shift);
				Assert (foster_parent != CORK_NIL);
				hashTablePutItem(remapping_table, HT_INT_TO_PTR(index),
								 HT_INT_TO_PTR(foster_parent));
			}
		}
	}

	for (int index = 0; index < count; index++)
	{
		tagEntryInfo *e = getEntryInCorkQueue(index);
		if (e && e->extensionFields.scopeIndex != CORK_NIL)
		{
			void *remapping_to = hashTableGetItem (remapping_table,
												   HT_INT_TO_PTR(e->extensionFields.scopeIndex));
			if (HT_PTR_TO_INT(remapping_to) != CORK_NIL)
				e->extensionFields.scopeIndex = HT_PTR_TO_INT(remapping_to);
		}
	}
	hashTableDelete(remapping_table);
}

static void adjustSectionKinds(struct sectionTracker section_tracker[])
{
	if (section_tracker[K_TITLE].count > 1)
	{
		shiftKinds(2, K_TITLE);
		return;
	}

	if (section_tracker[K_TITLE].count == 1
		&& section_tracker[K_SUBTITLE].count > 1)
	{
		shiftKinds(1, K_SUBTITLE);
		return;
	}
}

static void inlineTagScope(tagEntryInfo *e, int parent_index)
{
	tagEntryInfo *parent = getEntryInCorkQueue (parent_index);
	if (parent)
	{
		e->extensionFields.scopeKindIndex = parent->kindIndex;
		e->extensionFields.scopeName = eStrdup(parent->name);
		e->extensionFields.scopeIndex = CORK_NIL;
	}
}

static void inlineScopes (void)
{
	/* TODO
	   Following code makes the scope information full qualified form.
	   Do users want the full qualified form?
	   --- ./Units/rst.simple.d/expected.tags	2015-12-18 01:32:35.574255617 +0900
	   +++ /home/yamato/var/ctags-github/Units/rst.simple.d/FILTERED.tmp	2016-05-05 03:05:38.165604756 +0900
	   @@ -5,2 +5,2 @@
	   -Subsection 1.1.1	input.rst	/^Subsection 1.1.1$/;"	S	section:Section 1.1
	   -Subsubsection 1.1.1.1	input.rst	/^Subsubsection 1.1.1.1$/;"	t	subsection:Subsection 1.1.1
	   +Subsection 1.1.1	input.rst	/^Subsection 1.1.1$/;"	S	section:Chapter 1.Section 1.1
	   +Subsubsection 1.1.1.1	input.rst	/^Subsubsection 1.1.1.1$/;"	t	subsection:Chapter 1.Section 1.1.Subsection 1.1.1
	*/
	size_t count = countEntryInCorkQueue();
	for (int index = 0; index < count; index++)
	{
		tagEntryInfo *e = getEntryInCorkQueue(index);

		if (e && e->extensionFields.scopeIndex != CORK_NIL)
			inlineTagScope(e, e->extensionFields.scopeIndex);
	}
}

static void findRstTags (void)
{
	vString *name = vStringNew ();
	MIOPos filepos;
	const unsigned char *line;
	const unsigned char *markup_line;
	struct sectionTracker section_tracker[SECTION_COUNT];
	struct olineTracker overline;

	memset(&filepos, 0, sizeof(filepos));
	memset(section_tracker, 0, sizeof section_tracker);
	overline_clear(&overline);
	nestingLevels = nestingLevelsNew(0);

	while ((line = readLineFromInputFile ()) != NULL)
	{
		if ((markup_line = is_markup_line (line, '_')) != NULL)
		{
			overline_clear(&overline);
			/* Handle .. _target:
			 * http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#hyperlink-targets
			 */
			if (capture_markup (markup_line, ':', K_TARGET) != CORK_NIL)
			{
				vStringClear (name);
				continue;
			}
		}
		else if ((markup_line = is_markup_line (line, '[')) != NULL)
		{
			overline_clear(&overline);
			/* Handle .. [citation]
			 * https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#citations
			 */
			if (capture_markup (markup_line, ']', K_CITATION) != CORK_NIL)
			{
				vStringClear (name);
				continue;
			}
		}
		else if ((markup_line = is_markup_line (line, '|')) != NULL)
		{
			overline_clear(&overline);
			/* Hanle .. |substitute definition|
			 * https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#substitution-definitions
			 */
			if (capture_markup (markup_line, '|', K_SUBSTDEF) != CORK_NIL)
			{
				vStringClear (name);
				continue;
			}
		}

		int line_len = strlen((const char*) line);
		int name_len_bytes = vStringLength(name);
		/* FIXME: this isn't right, actually we need the real display width,
		 * taking into account double-width characters and stuff like that.
		 * But duh. */
		int name_len = utf8_strlen(vStringValue(name), name_len_bytes);

		/* if the name doesn't look like UTF-8, assume one-byte charset */
		if (name_len < 0)
			name_len = name_len_bytes;

		/* overline may come after an empty line (or begging of file). */
		if (name_len_bytes == 0 && line_len > 0 &&
			ispunct(line[0]) && issame((const char*) line))
		{
			overline_set(&overline, *line, line_len);
			continue;
		}

		/* underlines must be the same length or more */
		if (line_len >= name_len && name_len > 0 &&
			ispunct(line[0]) && issame((const char*) line))
		{
			char c = line[0];
			bool o = (overline.c == c && overline.len == line_len);
			int kind = get_kind(c, o, section_tracker);

			overline_clear(&overline);

			if (kind >= 0)
			{
				makeSectionRstTag(name, kind, filepos, c, o);
				vStringClear(name);
				continue;
			}
		}

		if (has_overline(&overline))
		{
			if (name_len > 0)
			{
				/*
				 * Though we saw an overline and a section title text,
				 * we cannot find the associated underline.
				 * In that case, we must reset the state of tracking
				 * overline.
				 */
				overline_clear(&overline);
			}

			/*
			 * We san an overline. The line is the candidate
			 * of a section title text. Skip the prefixed whitespaces.
			 */
			while (isspace(*line))
				line++;
		}

		vStringClear (name);
		if (!isspace(*line))
		{
			vStringCatS(name, (const char*)line);
			vStringStripTrailing (name);
			filepos = getInputFilePosition();
		}
	}
	/* Force popping all nesting levels */
	getNestingLevel (K_EOF);
	vStringDelete (name);
	nestingLevelsFree(nestingLevels);

	adjustSectionKinds(section_tracker);
	inlineScopes();
}

extern parserDefinition* RstParser (void)
{
	static const char *const extensions [] = { "rest", "reST", "rst", NULL };
	parserDefinition* const def = parserNew ("ReStructuredText");
	static const char *const aliases[] = {
		"rst",					/* The name of emacs's mode */
		NULL
	};

	def->kindTable = RstKinds;
	def->kindCount = ARRAY_SIZE (RstKinds);
	def->extensions = extensions;
	def->aliases = aliases;
	def->parser = findRstTags;

	def->fieldTable = RstFields;
	def->fieldCount = ARRAY_SIZE (RstFields);

	def->useCork = CORK_QUEUE;

	return def;
}