xref: /OpenGrok/opengrok-indexer/src/main/jflex/analysis/tcl/Tcl.lexh (revision d219b4cea555a12b602d2d5518daa22134ad4879)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * See LICENSE.txt included in this distribution for the specific
9 * language governing permissions and limitations under the License.
10 *
11 * When distributing Covered Code, include this CDDL HEADER in each
12 * file and include the License file at LICENSE.txt.
13 * If applicable, add the following below this CDDL HEADER, with the
14 * fields enclosed by brackets "[]" replaced with your own identifying
15 * information: Portions Copyright [yyyy] [name of copyright owner]
16 *
17 * CDDL HEADER END
18 */
19
20/*
21 * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved.
22 * Portions Copyright (c) 2017, Chris Fraire <cfraire@me.com>.
23 *
24 * Copyright © 1993 The Regents of the University of California.
25 * Copyright © 1994-1996 Sun Microsystems, Inc.
26 * Copyright © 1995-1997 Roger E. Critchlow Jr.
27 */
28
29Number = ([0-9]+\.[0-9]+|[0-9][0-9]*|"#" [boxBOX] [0-9a-fA-F]+)
30
31/*
32 * [1] Commands. ... Semi-colons and newlines are command separators unless
33 * quoted as described below.
34 *
35 * [3] Words. Words of a command are separated by white space (except for
36 * newlines, which are command separators).
37 * [4] Double quotes. If the first character of a word is double-quote (``"'')
38 * then the word is terminated by the next double-quote character.
39 * [5] Braces. If the first character of a word is an open brace (``{'') then
40 * the word is terminated by the matching close brace (``}'').
41 *  N.b. OpenGrok handles [4] and [5] as special matches distinct from {Word}.
42 *
43 * [9] Comments. If a hash character (``#'') appears at a point where Tcl is
44 * expecting the first character of the first word of a command, then the hash
45 * character and the characters that follow it, up through the next newline,
46 * are treated as a comment and ignored. The comment character only has
47 * significance when it appears at the beginning of a command.
48 *
49 * N.b. this "OrdinaryWord" is for OpenGrok's purpose of symbol tokenization
50 * and deviates from the above definitions by treating backslash escapes as
51 * word breaking and precluding some characters from starting words and mostly
52 * the same from continuing words. E.g., hyphen is not allowed by OpenGrok to
53 * start OrdinaryWord but can be present afterward.
54 */
55OrdinaryWord = [\S--\-,=#\"\}\{\]\[\)\(\\] [\S--#\"\}\{\]\[\)\(\\]*
56
57/*
58 * [7] Variable substitution.
59 *
60 * $name
61 *     Name is the name of a scalar variable; the name is a sequence of one or
62 *     more characters that are a letter, digit, underscore, or namespace
63 *     separators (two or more colons).
64 */
65Varsub1 = \$ {name_unit}+
66name_unit = ([\p{Letter}\p{Digit}_] | [:][:]+)
67/*
68 * $name(index)
69 *     Name gives the name of an array variable and index gives the name of an
70 *     element within that array. Name must contain only letters, digits,
71 *     underscores, and namespace separators, and may be an empty string.
72 */
73Varsub2 = \$ {name_unit}* \( {name_unit}+ \)
74/*
75 * ${name}
76 *     Name is the name of a scalar variable. It may contain any characters
77 *     whatsoever except for close braces.
78 */
79Varsub3 = \$\{ [^\}]+ \}
80
81/*
82 * [8] Backslash substitution.
83 * Backslash plus a character, where ... in all cases but [for the characters]
84 * described below, the backslash is dropped and the following character is
85 * treated as an ordinary character and included in the word.
86 *
87 * Special cases:
88 * a,f,b,n,r,t,v,backslash;
89 * \<newline>whiteSpace;
90 * \ooo The digits ooo (one, two, or three of them);
91 * \xhh The hexadecimal digits hh .... Any number of hexadecimal digits may be
92 *     present;
93 * \uhhhh The hexadecimal digits hhhh (one, two, three, or four of them)
94 *
95 * "Backslash substitution is not performed on words enclosed in braces, except
96 * for backslash-newline as described above."
97 */
98Backslash_sub = [\\] ([afbnrtv\\] | \p{Number}{1,3} | [x][0-9a-fA-F]+ |
99    [u][0-9a-fA-F]{1,4} | [[^]--[afbnrtv\n\p{Number}xu\\]])
100Backslash_nl = [\\] \n\s+
101
102WordOperators = ("*" | "&&" | "||")
103