xref: /Universal-ctags/parsers/txt2tags.c (revision bc29a326c2717ce7dcaef19f811d6e5738ef98a2)
1*bc29a326SJiří Techet /*
2*bc29a326SJiří Techet *   Copyright (c) 2009, Eric Forgeot
3*bc29a326SJiří Techet *   Copyright (c) 2014, Colomban Wendling <colomban@geany.org>
4*bc29a326SJiří Techet *
5*bc29a326SJiří Techet *   Based on work by Jon Strait
6*bc29a326SJiří Techet *
7*bc29a326SJiří Techet *   This source code is released for free distribution under the terms of the
8*bc29a326SJiří Techet *   GNU General Public License version 2 or (at your opinion) any later version.
9*bc29a326SJiří Techet *
10*bc29a326SJiří Techet *   This module contains functions for generating tags for Txt2tags files
11*bc29a326SJiří Techet *   (https://en.wikipedia.org/wiki/Txt2tags).
12*bc29a326SJiří Techet */
13*bc29a326SJiří Techet 
14*bc29a326SJiří Techet /*
15*bc29a326SJiří Techet *   INCLUDE FILES
16*bc29a326SJiří Techet */
17*bc29a326SJiří Techet #include "general.h"	/* must always come first */
18*bc29a326SJiří Techet 
19*bc29a326SJiří Techet #include <ctype.h>
20*bc29a326SJiří Techet #include <string.h>
21*bc29a326SJiří Techet 
22*bc29a326SJiří Techet #include "parse.h"
23*bc29a326SJiří Techet #include "read.h"
24*bc29a326SJiří Techet #include "nestlevel.h"
25*bc29a326SJiří Techet #include "vstring.h"
26*bc29a326SJiří Techet #include "routines.h"
27*bc29a326SJiří Techet #include "entry.h"
28*bc29a326SJiří Techet 
29*bc29a326SJiří Techet 
30*bc29a326SJiří Techet #define SCOPE_SEPARATOR "\"\""
31*bc29a326SJiří Techet 
32*bc29a326SJiří Techet /*
33*bc29a326SJiří Techet *   DATA DEFINITIONS
34*bc29a326SJiří Techet */
35*bc29a326SJiří Techet 
36*bc29a326SJiří Techet typedef enum {
37*bc29a326SJiří Techet 	K_SECTION = 0
38*bc29a326SJiří Techet } Txt2tagsKind;
39*bc29a326SJiří Techet 
40*bc29a326SJiří Techet static scopeSeparator Txt2TagsSeparators [] = {
41*bc29a326SJiří Techet 	{ KIND_WILDCARD_INDEX, SCOPE_SEPARATOR }
42*bc29a326SJiří Techet };
43*bc29a326SJiří Techet 
44*bc29a326SJiří Techet static kindDefinition Txt2tagsKinds[] = {
45*bc29a326SJiří Techet 	{ true, 's', "section", "sections",
46*bc29a326SJiří Techet 	  ATTACH_SEPARATORS(Txt2TagsSeparators) },
47*bc29a326SJiří Techet };
48*bc29a326SJiří Techet 
49*bc29a326SJiří Techet struct nestingLevelUserData {
50*bc29a326SJiří Techet 	int indentation;
51*bc29a326SJiří Techet };
52*bc29a326SJiří Techet #define NL_INDENTATION(nl) ((struct nestingLevelUserData *)nestingLevelGetUserData(nl))->indentation
53*bc29a326SJiří Techet 
54*bc29a326SJiří Techet /*
55*bc29a326SJiří Techet *   FUNCTION DEFINITIONS
56*bc29a326SJiří Techet */
57*bc29a326SJiří Techet 
makeTxt2tagsTag(const vString * const name,const NestingLevels * const nls,Txt2tagsKind type)58*bc29a326SJiří Techet static int makeTxt2tagsTag (const vString* const name,
59*bc29a326SJiří Techet                             const NestingLevels *const nls,
60*bc29a326SJiří Techet                             Txt2tagsKind type)
61*bc29a326SJiří Techet {
62*bc29a326SJiří Techet 	tagEntryInfo e;
63*bc29a326SJiří Techet 	NestingLevel *nl;
64*bc29a326SJiří Techet 	initTagEntry (&e, vStringValue(name), type);
65*bc29a326SJiří Techet 
66*bc29a326SJiří Techet 	nl = nestingLevelsGetCurrent (nls);
67*bc29a326SJiří Techet 	if (nl)
68*bc29a326SJiří Techet 		e.extensionFields.scopeIndex = nl->corkIndex;
69*bc29a326SJiří Techet 
70*bc29a326SJiří Techet 	return makeTagEntry(&e);
71*bc29a326SJiří Techet }
72*bc29a326SJiří Techet 
73*bc29a326SJiří Techet /* matches: ^ *[=_-]{20,} *$ */
isTxt2tagsLine(const unsigned char * line)74*bc29a326SJiří Techet static bool isTxt2tagsLine (const unsigned char *line)
75*bc29a326SJiří Techet {
76*bc29a326SJiří Techet 	unsigned int len;
77*bc29a326SJiří Techet 
78*bc29a326SJiří Techet 	while (isspace(*line)) line++;
79*bc29a326SJiří Techet 	for (len = 0; *line == '=' || *line == '-' || *line == '_'; len++)
80*bc29a326SJiří Techet 		line++;
81*bc29a326SJiří Techet 	while (isspace(*line)) line++;
82*bc29a326SJiří Techet 
83*bc29a326SJiří Techet 	return len >= 20 && *line == 0;
84*bc29a326SJiří Techet }
85*bc29a326SJiří Techet 
parseTxt2tagsTitle(const unsigned char * line,vString * const title,int * const depth_)86*bc29a326SJiří Techet static bool parseTxt2tagsTitle (const unsigned char *line,
87*bc29a326SJiří Techet                                 vString *const title,
88*bc29a326SJiří Techet                                 int *const depth_)
89*bc29a326SJiří Techet {
90*bc29a326SJiří Techet 	const int MAX_TITLE_DEPTH = 5; /* maximum length of a title delimiter */
91*bc29a326SJiří Techet 	unsigned char delim;
92*bc29a326SJiří Techet 	int delim_delta = 0;
93*bc29a326SJiří Techet 	const unsigned char *end;
94*bc29a326SJiří Techet 
95*bc29a326SJiří Techet 	/* skip leading spaces, but no tabs (probably because they create quotes) */
96*bc29a326SJiří Techet 	while (*line == ' ') line++;
97*bc29a326SJiří Techet 
98*bc29a326SJiří Techet 	/* normal/numbered titles */
99*bc29a326SJiří Techet 	if (*line != '=' && *line != '+')
100*bc29a326SJiří Techet 		return false;
101*bc29a326SJiří Techet 
102*bc29a326SJiří Techet 	delim = *line;
103*bc29a326SJiří Techet 
104*bc29a326SJiří Techet 	/* find the start delimiter length */
105*bc29a326SJiří Techet 	while (*line == delim && delim_delta < MAX_TITLE_DEPTH+1)
106*bc29a326SJiří Techet 	{
107*bc29a326SJiří Techet 		line++;
108*bc29a326SJiří Techet 		delim_delta++;
109*bc29a326SJiří Techet 	}
110*bc29a326SJiří Techet 	while (isspace(*line))
111*bc29a326SJiří Techet 		line++;
112*bc29a326SJiří Techet 
113*bc29a326SJiří Techet 	if (delim_delta > MAX_TITLE_DEPTH) /* invalid */
114*bc29a326SJiří Techet 		return false;
115*bc29a326SJiří Techet 
116*bc29a326SJiří Techet 	*depth_ = delim_delta;
117*bc29a326SJiří Techet 
118*bc29a326SJiří Techet 	/* find the end delimiter */
119*bc29a326SJiří Techet 	end = line + strlen((const char *) line) - 1;
120*bc29a326SJiří Techet 	while (end > line && isspace(*end)) end--;
121*bc29a326SJiří Techet 	/* skip a possible label: \[[A-Za-z0-9_-]+\] */
122*bc29a326SJiří Techet 	if (*end == ']')
123*bc29a326SJiří Techet 	{
124*bc29a326SJiří Techet 		end--;
125*bc29a326SJiří Techet 		while (end > line && (isalnum(*end) || *end == '_' || *end == '-'))
126*bc29a326SJiří Techet 			end--;
127*bc29a326SJiří Techet 		if (*end != '[') /* invalid */
128*bc29a326SJiří Techet 			return false;
129*bc29a326SJiří Techet 		end--;
130*bc29a326SJiří Techet 	}
131*bc29a326SJiří Techet 	while (end > line && *end == delim && delim_delta >= 0)
132*bc29a326SJiří Techet 	{
133*bc29a326SJiří Techet 		delim_delta--;
134*bc29a326SJiří Techet 		end--;
135*bc29a326SJiří Techet 	}
136*bc29a326SJiří Techet 	while (end > line && isspace(*end)) end--;
137*bc29a326SJiří Techet 	end++;
138*bc29a326SJiří Techet 
139*bc29a326SJiří Techet 	/* if start and end delimiters are not identical, or the the name is empty */
140*bc29a326SJiří Techet 	if (delim_delta != 0 || (end - line) <= 0)
141*bc29a326SJiří Techet 		return false;
142*bc29a326SJiří Techet 
143*bc29a326SJiří Techet 	vStringNCopyS(title, (const char *) line, end - line);
144*bc29a326SJiří Techet 	return true;
145*bc29a326SJiří Techet }
146*bc29a326SJiří Techet 
findTxt2tagsTags(void)147*bc29a326SJiří Techet static void findTxt2tagsTags (void)
148*bc29a326SJiří Techet {
149*bc29a326SJiří Techet 	NestingLevels *nls = nestingLevelsNew(sizeof(struct nestingLevelUserData));
150*bc29a326SJiří Techet 	vString *name = vStringNew();
151*bc29a326SJiří Techet 	const unsigned char *line;
152*bc29a326SJiří Techet 
153*bc29a326SJiří Techet 	while ((line = readLineFromInputFile()) != NULL)
154*bc29a326SJiří Techet 	{
155*bc29a326SJiří Techet 		int depth;
156*bc29a326SJiří Techet 
157*bc29a326SJiří Techet 		if (isTxt2tagsLine(line))
158*bc29a326SJiří Techet 			; /* skip not to improperly match titles */
159*bc29a326SJiří Techet 		else if (parseTxt2tagsTitle(line, name, &depth))
160*bc29a326SJiří Techet 		{
161*bc29a326SJiří Techet 			NestingLevel *nl = nestingLevelsGetCurrent(nls);
162*bc29a326SJiří Techet 			int r;
163*bc29a326SJiří Techet 
164*bc29a326SJiří Techet 			while (nl && NL_INDENTATION(nl) >= depth)
165*bc29a326SJiří Techet 			{
166*bc29a326SJiří Techet 				nestingLevelsPop(nls);
167*bc29a326SJiří Techet 				nl = nestingLevelsGetCurrent(nls);
168*bc29a326SJiří Techet 			}
169*bc29a326SJiří Techet 
170*bc29a326SJiří Techet 			r = makeTxt2tagsTag(name, nls, K_SECTION);
171*bc29a326SJiří Techet 			nestingLevelsPush(nls, r);
172*bc29a326SJiří Techet 			nl = nestingLevelsGetCurrent(nls);
173*bc29a326SJiří Techet 			NL_INDENTATION(nl) = depth;
174*bc29a326SJiří Techet 		}
175*bc29a326SJiří Techet 	}
176*bc29a326SJiří Techet 	vStringDelete (name);
177*bc29a326SJiří Techet 	nestingLevelsFree(nls);
178*bc29a326SJiří Techet }
179*bc29a326SJiří Techet 
Txt2tagsParser(void)180*bc29a326SJiří Techet extern parserDefinition* Txt2tagsParser (void)
181*bc29a326SJiří Techet {
182*bc29a326SJiří Techet 	static const char *const patterns [] = { "*.t2t", NULL };
183*bc29a326SJiří Techet 	static const char *const extensions [] = { "t2t", NULL };
184*bc29a326SJiří Techet 	parserDefinition* const def = parserNew ("Txt2tags");
185*bc29a326SJiří Techet 
186*bc29a326SJiří Techet 	def->kindTable = Txt2tagsKinds;
187*bc29a326SJiří Techet 	def->kindCount = ARRAY_SIZE (Txt2tagsKinds);
188*bc29a326SJiří Techet 	def->patterns = patterns;
189*bc29a326SJiří Techet 	def->extensions = extensions;
190*bc29a326SJiří Techet 	def->parser = findTxt2tagsTags;
191*bc29a326SJiří Techet 	def->useCork = CORK_QUEUE;
192*bc29a326SJiří Techet 	return def;
193*bc29a326SJiří Techet }
194*bc29a326SJiří Techet 
195