1*bc29a326SJiří Techet /*
2*bc29a326SJiří Techet * Copyright (c) 2009, Eric Forgeot
3*bc29a326SJiří Techet * Copyright (c) 2014, Colomban Wendling <colomban@geany.org>
4*bc29a326SJiří Techet *
5*bc29a326SJiří Techet * Based on work by Jon Strait
6*bc29a326SJiří Techet *
7*bc29a326SJiří Techet * This source code is released for free distribution under the terms of the
8*bc29a326SJiří Techet * GNU General Public License version 2 or (at your opinion) any later version.
9*bc29a326SJiří Techet *
10*bc29a326SJiří Techet * This module contains functions for generating tags for Txt2tags files
11*bc29a326SJiří Techet * (https://en.wikipedia.org/wiki/Txt2tags).
12*bc29a326SJiří Techet */
13*bc29a326SJiří Techet
14*bc29a326SJiří Techet /*
15*bc29a326SJiří Techet * INCLUDE FILES
16*bc29a326SJiří Techet */
17*bc29a326SJiří Techet #include "general.h" /* must always come first */
18*bc29a326SJiří Techet
19*bc29a326SJiří Techet #include <ctype.h>
20*bc29a326SJiří Techet #include <string.h>
21*bc29a326SJiří Techet
22*bc29a326SJiří Techet #include "parse.h"
23*bc29a326SJiří Techet #include "read.h"
24*bc29a326SJiří Techet #include "nestlevel.h"
25*bc29a326SJiří Techet #include "vstring.h"
26*bc29a326SJiří Techet #include "routines.h"
27*bc29a326SJiří Techet #include "entry.h"
28*bc29a326SJiří Techet
29*bc29a326SJiří Techet
30*bc29a326SJiří Techet #define SCOPE_SEPARATOR "\"\""
31*bc29a326SJiří Techet
32*bc29a326SJiří Techet /*
33*bc29a326SJiří Techet * DATA DEFINITIONS
34*bc29a326SJiří Techet */
35*bc29a326SJiří Techet
36*bc29a326SJiří Techet typedef enum {
37*bc29a326SJiří Techet K_SECTION = 0
38*bc29a326SJiří Techet } Txt2tagsKind;
39*bc29a326SJiří Techet
40*bc29a326SJiří Techet static scopeSeparator Txt2TagsSeparators [] = {
41*bc29a326SJiří Techet { KIND_WILDCARD_INDEX, SCOPE_SEPARATOR }
42*bc29a326SJiří Techet };
43*bc29a326SJiří Techet
44*bc29a326SJiří Techet static kindDefinition Txt2tagsKinds[] = {
45*bc29a326SJiří Techet { true, 's', "section", "sections",
46*bc29a326SJiří Techet ATTACH_SEPARATORS(Txt2TagsSeparators) },
47*bc29a326SJiří Techet };
48*bc29a326SJiří Techet
49*bc29a326SJiří Techet struct nestingLevelUserData {
50*bc29a326SJiří Techet int indentation;
51*bc29a326SJiří Techet };
52*bc29a326SJiří Techet #define NL_INDENTATION(nl) ((struct nestingLevelUserData *)nestingLevelGetUserData(nl))->indentation
53*bc29a326SJiří Techet
54*bc29a326SJiří Techet /*
55*bc29a326SJiří Techet * FUNCTION DEFINITIONS
56*bc29a326SJiří Techet */
57*bc29a326SJiří Techet
makeTxt2tagsTag(const vString * const name,const NestingLevels * const nls,Txt2tagsKind type)58*bc29a326SJiří Techet static int makeTxt2tagsTag (const vString* const name,
59*bc29a326SJiří Techet const NestingLevels *const nls,
60*bc29a326SJiří Techet Txt2tagsKind type)
61*bc29a326SJiří Techet {
62*bc29a326SJiří Techet tagEntryInfo e;
63*bc29a326SJiří Techet NestingLevel *nl;
64*bc29a326SJiří Techet initTagEntry (&e, vStringValue(name), type);
65*bc29a326SJiří Techet
66*bc29a326SJiří Techet nl = nestingLevelsGetCurrent (nls);
67*bc29a326SJiří Techet if (nl)
68*bc29a326SJiří Techet e.extensionFields.scopeIndex = nl->corkIndex;
69*bc29a326SJiří Techet
70*bc29a326SJiří Techet return makeTagEntry(&e);
71*bc29a326SJiří Techet }
72*bc29a326SJiří Techet
73*bc29a326SJiří Techet /* matches: ^ *[=_-]{20,} *$ */
isTxt2tagsLine(const unsigned char * line)74*bc29a326SJiří Techet static bool isTxt2tagsLine (const unsigned char *line)
75*bc29a326SJiří Techet {
76*bc29a326SJiří Techet unsigned int len;
77*bc29a326SJiří Techet
78*bc29a326SJiří Techet while (isspace(*line)) line++;
79*bc29a326SJiří Techet for (len = 0; *line == '=' || *line == '-' || *line == '_'; len++)
80*bc29a326SJiří Techet line++;
81*bc29a326SJiří Techet while (isspace(*line)) line++;
82*bc29a326SJiří Techet
83*bc29a326SJiří Techet return len >= 20 && *line == 0;
84*bc29a326SJiří Techet }
85*bc29a326SJiří Techet
parseTxt2tagsTitle(const unsigned char * line,vString * const title,int * const depth_)86*bc29a326SJiří Techet static bool parseTxt2tagsTitle (const unsigned char *line,
87*bc29a326SJiří Techet vString *const title,
88*bc29a326SJiří Techet int *const depth_)
89*bc29a326SJiří Techet {
90*bc29a326SJiří Techet const int MAX_TITLE_DEPTH = 5; /* maximum length of a title delimiter */
91*bc29a326SJiří Techet unsigned char delim;
92*bc29a326SJiří Techet int delim_delta = 0;
93*bc29a326SJiří Techet const unsigned char *end;
94*bc29a326SJiří Techet
95*bc29a326SJiří Techet /* skip leading spaces, but no tabs (probably because they create quotes) */
96*bc29a326SJiří Techet while (*line == ' ') line++;
97*bc29a326SJiří Techet
98*bc29a326SJiří Techet /* normal/numbered titles */
99*bc29a326SJiří Techet if (*line != '=' && *line != '+')
100*bc29a326SJiří Techet return false;
101*bc29a326SJiří Techet
102*bc29a326SJiří Techet delim = *line;
103*bc29a326SJiří Techet
104*bc29a326SJiří Techet /* find the start delimiter length */
105*bc29a326SJiří Techet while (*line == delim && delim_delta < MAX_TITLE_DEPTH+1)
106*bc29a326SJiří Techet {
107*bc29a326SJiří Techet line++;
108*bc29a326SJiří Techet delim_delta++;
109*bc29a326SJiří Techet }
110*bc29a326SJiří Techet while (isspace(*line))
111*bc29a326SJiří Techet line++;
112*bc29a326SJiří Techet
113*bc29a326SJiří Techet if (delim_delta > MAX_TITLE_DEPTH) /* invalid */
114*bc29a326SJiří Techet return false;
115*bc29a326SJiří Techet
116*bc29a326SJiří Techet *depth_ = delim_delta;
117*bc29a326SJiří Techet
118*bc29a326SJiří Techet /* find the end delimiter */
119*bc29a326SJiří Techet end = line + strlen((const char *) line) - 1;
120*bc29a326SJiří Techet while (end > line && isspace(*end)) end--;
121*bc29a326SJiří Techet /* skip a possible label: \[[A-Za-z0-9_-]+\] */
122*bc29a326SJiří Techet if (*end == ']')
123*bc29a326SJiří Techet {
124*bc29a326SJiří Techet end--;
125*bc29a326SJiří Techet while (end > line && (isalnum(*end) || *end == '_' || *end == '-'))
126*bc29a326SJiří Techet end--;
127*bc29a326SJiří Techet if (*end != '[') /* invalid */
128*bc29a326SJiří Techet return false;
129*bc29a326SJiří Techet end--;
130*bc29a326SJiří Techet }
131*bc29a326SJiří Techet while (end > line && *end == delim && delim_delta >= 0)
132*bc29a326SJiří Techet {
133*bc29a326SJiří Techet delim_delta--;
134*bc29a326SJiří Techet end--;
135*bc29a326SJiří Techet }
136*bc29a326SJiří Techet while (end > line && isspace(*end)) end--;
137*bc29a326SJiří Techet end++;
138*bc29a326SJiří Techet
139*bc29a326SJiří Techet /* if start and end delimiters are not identical, or the the name is empty */
140*bc29a326SJiří Techet if (delim_delta != 0 || (end - line) <= 0)
141*bc29a326SJiří Techet return false;
142*bc29a326SJiří Techet
143*bc29a326SJiří Techet vStringNCopyS(title, (const char *) line, end - line);
144*bc29a326SJiří Techet return true;
145*bc29a326SJiří Techet }
146*bc29a326SJiří Techet
findTxt2tagsTags(void)147*bc29a326SJiří Techet static void findTxt2tagsTags (void)
148*bc29a326SJiří Techet {
149*bc29a326SJiří Techet NestingLevels *nls = nestingLevelsNew(sizeof(struct nestingLevelUserData));
150*bc29a326SJiří Techet vString *name = vStringNew();
151*bc29a326SJiří Techet const unsigned char *line;
152*bc29a326SJiří Techet
153*bc29a326SJiří Techet while ((line = readLineFromInputFile()) != NULL)
154*bc29a326SJiří Techet {
155*bc29a326SJiří Techet int depth;
156*bc29a326SJiří Techet
157*bc29a326SJiří Techet if (isTxt2tagsLine(line))
158*bc29a326SJiří Techet ; /* skip not to improperly match titles */
159*bc29a326SJiří Techet else if (parseTxt2tagsTitle(line, name, &depth))
160*bc29a326SJiří Techet {
161*bc29a326SJiří Techet NestingLevel *nl = nestingLevelsGetCurrent(nls);
162*bc29a326SJiří Techet int r;
163*bc29a326SJiří Techet
164*bc29a326SJiří Techet while (nl && NL_INDENTATION(nl) >= depth)
165*bc29a326SJiří Techet {
166*bc29a326SJiří Techet nestingLevelsPop(nls);
167*bc29a326SJiří Techet nl = nestingLevelsGetCurrent(nls);
168*bc29a326SJiří Techet }
169*bc29a326SJiří Techet
170*bc29a326SJiří Techet r = makeTxt2tagsTag(name, nls, K_SECTION);
171*bc29a326SJiří Techet nestingLevelsPush(nls, r);
172*bc29a326SJiří Techet nl = nestingLevelsGetCurrent(nls);
173*bc29a326SJiří Techet NL_INDENTATION(nl) = depth;
174*bc29a326SJiří Techet }
175*bc29a326SJiří Techet }
176*bc29a326SJiří Techet vStringDelete (name);
177*bc29a326SJiří Techet nestingLevelsFree(nls);
178*bc29a326SJiří Techet }
179*bc29a326SJiří Techet
Txt2tagsParser(void)180*bc29a326SJiří Techet extern parserDefinition* Txt2tagsParser (void)
181*bc29a326SJiří Techet {
182*bc29a326SJiří Techet static const char *const patterns [] = { "*.t2t", NULL };
183*bc29a326SJiří Techet static const char *const extensions [] = { "t2t", NULL };
184*bc29a326SJiří Techet parserDefinition* const def = parserNew ("Txt2tags");
185*bc29a326SJiří Techet
186*bc29a326SJiří Techet def->kindTable = Txt2tagsKinds;
187*bc29a326SJiří Techet def->kindCount = ARRAY_SIZE (Txt2tagsKinds);
188*bc29a326SJiří Techet def->patterns = patterns;
189*bc29a326SJiří Techet def->extensions = extensions;
190*bc29a326SJiří Techet def->parser = findTxt2tagsTags;
191*bc29a326SJiří Techet def->useCork = CORK_QUEUE;
192*bc29a326SJiří Techet return def;
193*bc29a326SJiří Techet }
194*bc29a326SJiří Techet
195