1 /*
2 * Copyright (c) 2002-2003, Darren Hiebert
3 *
4 * This source code is released for free distribution under the terms of the
5 * GNU General Public License version 2 or (at your option) any later version.
6 *
7 * This module contains functions for generating tags for PL/SQL language
8 * files.
9 */
10
11 /*
12 * INCLUDE FILES
13 */
14 #include "general.h" /* must always come first */
15
16 #include <ctype.h> /* to define isalpha () */
17 #ifdef DEBUG
18 #include <stdio.h>
19 #endif
20 #include <string.h>
21
22 #include "debug.h"
23 #include "entry.h"
24 #include "keyword.h"
25 #include "parse.h"
26 #include "read.h"
27 #include "routines.h"
28 #include "vstring.h"
29 #include "xtag.h"
30 #include "promise.h"
31
32 /*
33 * On-line "Oracle Database PL/SQL Language Reference":
34 * http://download.oracle.com/docs/cd/B28359_01/appdev.111/b28370/toc.htm
35 *
36 * Sample PL/SQL code is available from:
37 * http://www.orafaq.com/faqscrpt.htm#GENPLSQL
38 *
39 * On-line SQL Anywhere Documentation
40 * http://www.ianywhere.com/developer/product_manuals/sqlanywhere/index.html
41 */
42
43 /*
44 * MACROS
45 */
46 #define isType(token,t) (bool) ((token)->type == (t))
47 #define isKeyword(token,k) (bool) ((token)->keyword == (k))
48 #define isReservedWord(token) (SqlReservedWord[(token)->keyword].fn \
49 ?(bool)SqlReservedWord[(token)->keyword].fn(token) \
50 :SqlReservedWord[(token)->keyword].bit)
51 #define isIdentChar1(c) \
52 /*
53 * Other databases are less restrictive on the first character of
54 * an identifier.
55 * isIdentChar1 is used to identify the first character of an
56 * identifier, so we are removing some restrictions.
57 */ \
58 (isalpha (c) || (c) == '@' || (c) == '_' )
59 #define isIdentChar(c) \
60 (isalpha (c) || isdigit (c) || (c) == '$' || \
61 (c) == '@' || (c) == '_' || (c) == '#')
62
63 /*
64 * DATA DECLARATIONS
65 */
66
67 /*
68 * Used to specify type of keyword.
69 */
70 enum eKeywordId {
71 KEYWORD_at,
72 KEYWORD_begin,
73 KEYWORD_body,
74 KEYWORD_call,
75 KEYWORD_case,
76 KEYWORD_check,
77 KEYWORD_commit,
78 KEYWORD_comment,
79 KEYWORD_constraint,
80 KEYWORD_create,
81 KEYWORD_cursor,
82 KEYWORD_database,
83 KEYWORD_datatype,
84 KEYWORD_declare,
85 KEYWORD_do,
86 KEYWORD_domain,
87 KEYWORD_drop,
88 KEYWORD_else,
89 KEYWORD_elseif,
90 KEYWORD_end,
91 KEYWORD_endif,
92 KEYWORD_event,
93 KEYWORD_exception,
94 KEYWORD_extension,
95 KEYWORD_external,
96 KEYWORD_for,
97 KEYWORD_foreign,
98 KEYWORD_from,
99 KEYWORD_function,
100 KEYWORD_go,
101 KEYWORD_handler,
102 KEYWORD_if,
103 KEYWORD_index,
104 KEYWORD_internal,
105 KEYWORD_is,
106 KEYWORD_language,
107 KEYWORD_local,
108 KEYWORD_loop,
109 KEYWORD_ml_conn,
110 KEYWORD_ml_conn_chk,
111 KEYWORD_ml_conn_dnet,
112 KEYWORD_ml_conn_java,
113 KEYWORD_ml_conn_lang,
114 KEYWORD_ml_prop,
115 KEYWORD_ml_table,
116 KEYWORD_ml_table_chk,
117 KEYWORD_ml_table_dnet,
118 KEYWORD_ml_table_java,
119 KEYWORD_ml_table_lang,
120 KEYWORD_object,
121 KEYWORD_on,
122 KEYWORD_package,
123 KEYWORD_pragma,
124 KEYWORD_inquiry_directive,
125 KEYWORD_primary,
126 KEYWORD_procedure,
127 KEYWORD_publication,
128 KEYWORD_record,
129 KEYWORD_ref,
130 KEYWORD_references,
131 KEYWORD_rem,
132 KEYWORD_result,
133 KEYWORD_return,
134 KEYWORD_returns,
135 KEYWORD_schema,
136 KEYWORD_select,
137 KEYWORD_service,
138 KEYWORD_subtype,
139 KEYWORD_synonym,
140 KEYWORD_table,
141 KEYWORD_temporary,
142 KEYWORD_then,
143 KEYWORD_trigger,
144 KEYWORD_type,
145 KEYWORD_unique,
146 KEYWORD_url,
147 KEYWORD_variable,
148 KEYWORD_view,
149 KEYWORD_when,
150 KEYWORD_while,
151 KEYWORD_with,
152 KEYWORD_without,
153 SQLKEYWORD_COUNT,
154 };
155 typedef int keywordId; /* to allow KEYWORD_NONE */
156
157 typedef enum eTokenType {
158 TOKEN_UNDEFINED,
159 TOKEN_EOF,
160 TOKEN_BLOCK_LABEL_BEGIN,
161 TOKEN_BLOCK_LABEL_END,
162 TOKEN_CHARACTER,
163 TOKEN_CLOSE_PAREN,
164 TOKEN_COLON,
165 TOKEN_SEMICOLON,
166 TOKEN_COMMA,
167 TOKEN_IDENTIFIER,
168 TOKEN_KEYWORD,
169 TOKEN_OPEN_PAREN,
170 TOKEN_OPERATOR,
171 TOKEN_OTHER,
172 TOKEN_STRING,
173 TOKEN_PERIOD,
174 TOKEN_OPEN_CURLY,
175 TOKEN_CLOSE_CURLY,
176 TOKEN_OPEN_SQUARE,
177 TOKEN_CLOSE_SQUARE,
178 TOKEN_TILDE,
179 TOKEN_FORWARD_SLASH,
180 TOKEN_EQUAL
181 } tokenType;
182
183 typedef struct sTokenInfoSQL {
184 tokenType type;
185 keywordId keyword;
186 vString * string;
187 vString * scope;
188 int scopeKind;
189 int begin_end_nest_lvl;
190 unsigned long lineNumber;
191 MIOPos filePosition;
192
193 /* When the "guest" extra is enabled, a promise is
194 * made always when reading a string (literal or dollar quote).
195 * The lexer stores the id of promise to this member.
196 * When making the promise, the language of guest parser
197 * may not be determined yet.
198 *
199 * CREATE FUNCTION ... AS ' sub code_written_in_perl {... ' LANGUAGE plperl;
200 *
201 * After reading a string, the parser may find LANGUAGE keyword. In the case,
202 * the parser updates the language of the promies.
203 *
204 * This field is filled only when `guest` extra is enabled.
205 *
206 */
207 int promise;
208 } tokenInfo;
209
210 /*
211 * DATA DEFINITIONS
212 */
213
214 static langType Lang_sql;
215
216 typedef enum {
217 SQLTAG_PLSQL_CCFLAGS,
218 SQLTAG_DOMAIN,
219 SQLTAG_FIELD,
220 SQLTAG_BLOCK_LABEL,
221 SQLTAG_PACKAGE,
222 SQLTAG_SERVICE,
223 SQLTAG_SCHEMA,
224 SQLTAG_TRIGGER,
225 SQLTAG_PUBLICATION,
226 SQLTAG_VIEW,
227 SQLTAG_DATABASE,
228 SQLTAG_CURSOR,
229 SQLTAG_PROTOTYPE,
230 SQLTAG_EVENT,
231 SQLTAG_FUNCTION,
232 SQLTAG_INDEX,
233 SQLTAG_LOCAL_VARIABLE,
234 SQLTAG_SYNONYM,
235 SQLTAG_PROCEDURE,
236 SQLTAG_RECORD,
237 SQLTAG_SUBTYPE,
238 SQLTAG_TABLE,
239 SQLTAG_VARIABLE,
240 SQLTAG_MLTABLE,
241 SQLTAG_MLCONN,
242 SQLTAG_MLPROP,
243 SQLTAG_COUNT
244 } sqlKind;
245
246 static kindDefinition SqlKinds [] = {
247 { true, 'C', "ccflag", "PLSQL_CCFLAGS" },
248 { true, 'D', "domain", "domains" },
249 { true, 'E', "field", "record fields" },
250 { true, 'L', "label", "block label" },
251 { true, 'P', "package", "packages" },
252 { true, 'R', "service", "services" },
253 { true, 'S', "schema", "schemas" },
254 { true, 'T', "trigger", "triggers" },
255 { true, 'U', "publication", "publications" },
256 { true, 'V', "view", "views" },
257 { true, 'b', "database", "database" },
258 { true, 'c', "cursor", "cursors" },
259 { false, 'd', "prototype", "prototypes" },
260 { true, 'e', "event", "events" },
261 { true, 'f', "function", "functions" },
262 { true, 'i', "index", "indexes" },
263 { false, 'l', "local", "local variables" },
264 { true, 'n', "synonym", "synonyms" },
265 { true, 'p', "procedure", "procedures" },
266 { false, 'r', "record", "records" },
267 { true, 's', "subtype", "subtypes" },
268 { true, 't', "table", "tables" },
269 { true, 'v', "variable", "variables" },
270 { true, 'x', "mltable", "MobiLink Table Scripts" },
271 { true, 'y', "mlconn", "MobiLink Conn Scripts" },
272 { true, 'z', "mlprop", "MobiLink Properties" },
273 };
274
275 static const keywordTable SqlKeywordTable [] = {
276 /* keyword keyword ID */
277 { "as", KEYWORD_is },
278 { "at", KEYWORD_at },
279 { "begin", KEYWORD_begin },
280 { "body", KEYWORD_body },
281 { "call", KEYWORD_call },
282 { "case", KEYWORD_case },
283 { "check", KEYWORD_check },
284 { "commit", KEYWORD_commit },
285 { "comment", KEYWORD_comment },
286 { "constraint", KEYWORD_constraint },
287 { "create", KEYWORD_create },
288 { "cursor", KEYWORD_cursor },
289 { "database", KEYWORD_database },
290 { "datatype", KEYWORD_datatype },
291 { "declare", KEYWORD_declare },
292 { "do", KEYWORD_do },
293 { "domain", KEYWORD_domain },
294 { "drop", KEYWORD_drop },
295 { "else", KEYWORD_else },
296 { "elseif", KEYWORD_elseif },
297 { "end", KEYWORD_end },
298 { "endif", KEYWORD_endif },
299 { "event", KEYWORD_event },
300 { "exception", KEYWORD_exception },
301 { "extension", KEYWORD_extension },
302 { "external", KEYWORD_external },
303 { "for", KEYWORD_for },
304 { "foreign", KEYWORD_foreign },
305 { "from", KEYWORD_from },
306 { "function", KEYWORD_function },
307 { "go", KEYWORD_go },
308 { "handler", KEYWORD_handler },
309 { "if", KEYWORD_if },
310 { "index", KEYWORD_index },
311 { "internal", KEYWORD_internal },
312 { "is", KEYWORD_is },
313 { "language", KEYWORD_language },
314 { "local", KEYWORD_local },
315 { "loop", KEYWORD_loop },
316 { "ml_add_connection_script", KEYWORD_ml_conn },
317 { "ml_add_dnet_connection_script", KEYWORD_ml_conn_dnet },
318 { "ml_add_dnet_table_script", KEYWORD_ml_table_dnet },
319 { "ml_add_java_connection_script", KEYWORD_ml_conn_java },
320 { "ml_add_java_table_script", KEYWORD_ml_table_java },
321 { "ml_add_lang_conn_script_chk", KEYWORD_ml_conn_chk },
322 { "ml_add_lang_connection_script", KEYWORD_ml_conn_lang },
323 { "ml_add_lang_table_script", KEYWORD_ml_table_lang },
324 { "ml_add_lang_table_script_chk", KEYWORD_ml_table_chk },
325 { "ml_add_property", KEYWORD_ml_prop },
326 { "ml_add_table_script", KEYWORD_ml_table },
327 { "object", KEYWORD_object },
328 { "on", KEYWORD_on },
329 { "package", KEYWORD_package },
330 { "pragma", KEYWORD_pragma },
331 { "primary", KEYWORD_primary },
332 { "procedure", KEYWORD_procedure },
333 { "publication", KEYWORD_publication },
334 { "record", KEYWORD_record },
335 { "ref", KEYWORD_ref },
336 { "references", KEYWORD_references },
337 { "rem", KEYWORD_rem },
338 { "result", KEYWORD_result },
339 { "return", KEYWORD_return },
340 { "returns", KEYWORD_returns },
341 { "schema", KEYWORD_schema },
342 { "select", KEYWORD_select },
343 { "service", KEYWORD_service },
344 { "subtype", KEYWORD_subtype },
345 { "synonym", KEYWORD_synonym },
346 { "table", KEYWORD_table },
347 { "temporary", KEYWORD_temporary },
348 { "then", KEYWORD_then },
349 { "trigger", KEYWORD_trigger },
350 { "type", KEYWORD_type },
351 { "unique", KEYWORD_unique },
352 { "url", KEYWORD_url },
353 { "variable", KEYWORD_variable },
354 { "view", KEYWORD_view },
355 { "when", KEYWORD_when },
356 { "while", KEYWORD_while },
357 { "with", KEYWORD_with },
358 { "without", KEYWORD_without },
359 };
360
361 const static struct keywordGroup predefinedInquiryDirective = {
362 .value = KEYWORD_inquiry_directive,
363 .addingUnlessExisting = false,
364 .keywords = {
365 /* https://docs.oracle.com/en/database/oracle/oracle-database/18/lnpls/plsql-language-fundamentals.html#GUID-3DABF5E1-AC84-448B-810F-31196991EA10 */
366 "PLSQL_LINE",
367 "PLSQL_UNIT",
368 "PLSQL_UNIT_OWNER",
369 "PLSQL_UNIT_TYPE",
370 /* https://docs.oracle.com/en/database/oracle/oracle-database/18/lnpls/overview.html#GUID-DF63BC59-22C2-4BA8-9240-F74D505D5102 */
371 "PLSCOPE_SETTINGS",
372 "PLSQL_CCFLAGS",
373 "PLSQL_CODE_TYPE",
374 "PLSQL_OPTIMIZE_LEVEL",
375 "PLSQL_WARNINGS",
376 "NLS_LENGTH_SEMANTICS",
377 "PERMIT_92_WRAP_FORMAT",
378 NULL
379 },
380 };
381
382 /* A table representing whether a keyword is "reserved word" or not.
383 * "reserved word" cannot be used as an name.
384 * See https://dev.mysql.com/doc/refman/8.0/en/keywords.html about the
385 * difference between keywords and the reserved words.
386 *
387 * We will mark a keyword as a reserved word only if all the SQL dialects
388 * specify it as a reserved word.
389 */
390 struct SqlReservedWord {
391 /* If fn is non-NULL, value returned from fn(token) is used
392 * to repreesnt whether a keyword is reserved (true) or not.
393 * If fn is NULL, bit is used. */
394 unsigned int bit;
395 bool (* fn) (tokenInfo *const token);
396 };
397
398 /*
399 * MYSQL
400 * => https://dev.mysql.com/doc/refman/8.0/en/keywords.html
401 * POSTGRESQL,SQL2016,SQL2011,SQL92
402 * => https://www.postgresql.org/docs/12/sql-keywords-appendix.html
403 * ORACLE11g, PLSQL
404 * => https://docs.oracle.com/cd/B28359_01/appdev.111/b31231/appb.htm#CJHIIICD
405 * SQLANYWERE
406 * => http://dcx.sap.com/1200/en/dbreference/alhakeywords.html <the page is gone>
407 */
408 static bool SqlReservedWordPredicatorForIsOrAs (tokenInfo *const token);
409 static struct SqlReservedWord SqlReservedWord [SQLKEYWORD_COUNT] = {
410 /*
411 * RESERVED_BIT: MYSQL & POSTGRESQL&SQL2016&SQL2011&SQL92 & ORACLE11g&PLSQL & SQLANYWERE
412 *
413 * { 0 } means we have not inspect whether the keyword is reserved or not.
414 */
415 [KEYWORD_at] = {0 & 0&1&1&1 & 0&1 & 0},
416 [KEYWORD_begin] = {0 & 0&1&1&1 & 0&1 & 1},
417 [KEYWORD_body] = {0 & 0&0&0&0 & 0&1 & 0},
418 [KEYWORD_call] = {1 & 0&1&1&0 & 0&0 & 1},
419 [KEYWORD_case] = {1 & 1&1&1&1 & 0&1 & 1},
420 [KEYWORD_check] = {1 & 1&1&1&1 & 1&1 & 1},
421 [KEYWORD_commit] = {0 & 0&1&1&1 & 0&0 & 0}, /* SQLANYWERE:??? */
422 [KEYWORD_comment] = {0 & 0&0&0&0 & 1&1 & 1},
423 [KEYWORD_constraint] = {1 & 1&1&1&1 & 0&1 & 1},
424 [KEYWORD_create] = {1 & 1&1&1&1 & 1&1 & 1},
425 [KEYWORD_cursor] = {1 & 0&1&1&1 & 0&1 & 1},
426 [KEYWORD_database] = { 0 },
427 [KEYWORD_datatype] = {0 & 0&0&0&0 & 0&0 & 0},
428 [KEYWORD_declare] = {1 & 0&1&1&1 & 0&1 & 1},
429 [KEYWORD_do] = {0 & 1&0&0&0 & 0&1 & 1},
430 [KEYWORD_domain] = {0 & 0&0&0&1 & 0&0 & 0},
431 [KEYWORD_drop] = {1 & 0&1&1&1 & 1&1 & 1},
432 [KEYWORD_else] = {1 & 1&1&1&1 & 1&1 & 1},
433 [KEYWORD_elseif] = {1 & 0&0&0&0 & 0&0 & 1},
434 [KEYWORD_end] = {0 & 1&1&1&1 & 0&1 & 1},
435 [KEYWORD_endif] = {0 & 0&0&0&0 & 0&0 & 1},
436 [KEYWORD_event] = {0 & 0&0&0&0 & 0&0 & 0},
437 [KEYWORD_exception] = {0 & 0&0&0&1 & 0&1 & 1},
438 [KEYWORD_extension] = {0 & 0&0&0&0 & 0&0 & 0},
439 [KEYWORD_external] = {0 & 0&1&1&1 & 0&0 & 0},
440 [KEYWORD_for] = {1 & 1&1&1&1 & 1&1 & 1},
441 [KEYWORD_foreign] = {1 & 1&1&1&1 & 0&0 & 1},
442 [KEYWORD_from] = {1 & 1&1&1&1 & 1&1 & 1},
443 [KEYWORD_function] = {1 & 0&1&1&0 & 0&1 & 0},
444 [KEYWORD_go] = {0 & 0&0&0&1 & 0&0 & 0},
445 [KEYWORD_handler] = {0 & 0&0&0&0 & 0&0 & 0},
446 [KEYWORD_if] = {1 & 0&0&0&0 & 0&1 & 1},
447 [KEYWORD_index] = {1 & 0&0&0&0 & 1&1 & 1},
448 [KEYWORD_inquiry_directive] = { 0 },
449 [KEYWORD_internal] = {1 & 0&1&1&0 & 0&0 & 0},
450 [KEYWORD_is] = {0, SqlReservedWordPredicatorForIsOrAs},
451 [KEYWORD_language] = { 0 },
452 [KEYWORD_local] = {0 & 0&1&1&1 & 0&0 & 0},
453 [KEYWORD_loop] = {1 & 1&1&1&1 & 0&1 & 0},
454 [KEYWORD_ml_conn] = {0 & 0&0&0&0 & 0&0 & 0},
455 [KEYWORD_ml_conn_dnet] = {0 & 0&0&0&0 & 0&0 & 0},
456 [KEYWORD_ml_table_dnet] = {0 & 0&0&0&0 & 0&0 & 0},
457 [KEYWORD_ml_conn_java] = {0 & 0&0&0&0 & 0&0 & 0},
458 [KEYWORD_ml_table_java] = {0 & 0&0&0&0 & 0&0 & 0},
459 [KEYWORD_ml_conn_chk] = {0 & 0&0&0&0 & 0&0 & 0},
460 [KEYWORD_ml_conn_lang] = {0 & 0&0&0&0 & 0&0 & 0},
461 [KEYWORD_ml_table_lang] = {0 & 0&0&0&0 & 0&0 & 0},
462 [KEYWORD_ml_table_chk] = {0 & 0&0&0&0 & 0&0 & 0},
463 [KEYWORD_ml_prop] = {0 & 0&0&0&0 & 0&0 & 0},
464 [KEYWORD_ml_table] = {0 & 0&0&0&0 & 0&0 & 0},
465 [KEYWORD_object] = {0 & 0&0&0&0 & 0&0 & 0},
466 [KEYWORD_on] = {1 & 1&1&1&1 & 1&1 & 1},
467 [KEYWORD_package] = {0 & 0&0&0&0 & 0&1 & 0},
468 [KEYWORD_pragma] = {0 & 0&0&0&0 & 0&1 & 0},
469 [KEYWORD_primary] = {1 & 1&1&1&1 & 0&0 & 1},
470 [KEYWORD_procedure] = {1 & 0&0&0&0 & 0&1 & 1},
471 [KEYWORD_publication] = {0 & 0&0&0&0 & 0&0 & 1},
472 [KEYWORD_record] = {0 & 0&0&0&0 & 0&1 & 0},
473 [KEYWORD_ref] = {0 & 0&1&1&0 & 0&0 & 0},
474 [KEYWORD_references] = {1 & 1&1&1&1 & 0&0 & 1},
475 [KEYWORD_rem] = {0 & 0&0&0&0 & 0&0 & 0},
476 [KEYWORD_result] = {0 & 0&1&1&0 & 0&0 & 0},
477 [KEYWORD_return] = {1 & 0&1&1&0 & 0&1 & 1},
478 [KEYWORD_returns] = {0 & 0&0&0&0 & 0&0 & 0},
479 [KEYWORD_schema] = {0 & 0&0&0&0 & 0&0 & 0},
480 [KEYWORD_select] = {1 & 1&1&1&1 & 1&1 & 1},
481 [KEYWORD_service] = {0 & 0&0&0&0 & 0&0 & 0},
482 [KEYWORD_subtype] = {0 & 0&0&0&0 & 0&1 & 0},
483 [KEYWORD_synonym] = {0 & 0&0&0&0 & 1&0 & 0},
484 [KEYWORD_table] = {1 & 1&1&1&1 & 1&1 & 1},
485 [KEYWORD_temporary] = {0 & 0&0&0&1 & 0&0 & 1},
486 [KEYWORD_then] = {1 & 1&1&1&1 & 1&1 & 1},
487 [KEYWORD_trigger] = {1 & 0&1&1&0 & 1&0 & 1},
488 [KEYWORD_type] = {0 & 0&0&0&0 & 0&1 & 0},
489 [KEYWORD_unique] = {1 & 1&1&1&1 & 1&1 & 1},
490 [KEYWORD_url] = {0 & 0&0&0&0 & 0&0 & 0},
491 [KEYWORD_variable] = {0 & 0&0&0&0 & 0&0 & 1},
492 [KEYWORD_view] = {0 & 0&0&0&1 & 1&1 & 1},
493 [KEYWORD_when] = {1 & 1&1&1&1 & 0&1 & 1},
494 [KEYWORD_while] = {1 & 0&0&0&0 & 0&1 & 1},
495 [KEYWORD_with] = {1 & 1&1&1&1 & 1&1 & 1},
496 [KEYWORD_without] = {0 & 0&1&1&0 & 0&0 & 0},
497 };
498
499 /*
500 * FUNCTION DECLARATIONS
501 */
502
503 /* Recursive calls */
504 static void parseBlock (tokenInfo *const token, const bool local);
505 static void parseBlockFull (tokenInfo *const token, const bool local, langType lang);
506 static void parseDeclare (tokenInfo *const token, const bool local);
507 static void parseKeywords (tokenInfo *const token);
508 static tokenType parseSqlFile (tokenInfo *const token);
509
510 /*
511 * FUNCTION DEFINITIONS
512 */
513
SqlReservedWordPredicatorForIsOrAs(tokenInfo * const token)514 static bool SqlReservedWordPredicatorForIsOrAs (tokenInfo *const token)
515 {
516 if (strcasecmp ("as", vStringValue (token->string)) == 0)
517 return (bool) (1 & 1&1&1&1 & 1&1 & 1);
518 else /* for "is" */
519 return (bool) (1 & 0&1&1&1 & 1&1 & 1);
520 /* PostgresSQL can use "is" as a name of function. */
521 }
522
isCmdTerm(tokenInfo * const token)523 static bool isCmdTerm (tokenInfo *const token)
524 {
525 DebugStatement (
526 debugPrintf (DEBUG_PARSE
527 , "\n isCmdTerm: token same tt:%d tk:%d\n"
528 , token->type
529 , token->keyword
530 );
531 );
532
533 /*
534 * Based on the various customer sites I have been at
535 * the most common command delimiters are
536 * ;
537 * ~
538 * /
539 * go
540 * This routine will check for any of these, more
541 * can easily be added by modifying readToken and
542 * either adding the character to:
543 * enum eTokenType
544 * enum eTokenType
545 */
546 return (isType (token, TOKEN_SEMICOLON) ||
547 isType (token, TOKEN_TILDE) ||
548 isType (token, TOKEN_FORWARD_SLASH) ||
549 isKeyword (token, KEYWORD_go));
550 }
551
isMatchedEnd(tokenInfo * const token,int nest_lvl)552 static bool isMatchedEnd(tokenInfo *const token, int nest_lvl)
553 {
554 bool terminated = false;
555 /*
556 * Since different forms of SQL allow the use of
557 * BEGIN
558 * ...
559 * END
560 * blocks, some statements may not be terminated using
561 * the standard delimiters:
562 * ;
563 * ~
564 * /
565 * go
566 * This routine will check to see if we encounter and END
567 * for the matching nest level of BEGIN ... END statements.
568 * If we find one, then we can assume, the statement was terminated
569 * since we have fallen through to the END statement of the BEGIN
570 * block.
571 */
572 if ( nest_lvl > 0 && isKeyword (token, KEYWORD_end) )
573 {
574 if ( token->begin_end_nest_lvl == nest_lvl )
575 terminated = true;
576 }
577
578 return terminated;
579 }
580
newToken(void)581 static tokenInfo *newToken (void)
582 {
583 tokenInfo *const token = xMalloc (1, tokenInfo);
584
585 token->type = TOKEN_UNDEFINED;
586 token->keyword = KEYWORD_NONE;
587 token->string = vStringNew ();
588 token->scope = vStringNew ();
589 token->scopeKind = SQLTAG_COUNT;
590 token->begin_end_nest_lvl = 0;
591 token->lineNumber = getInputLineNumber ();
592 token->filePosition = getInputFilePosition ();
593 token->promise = -1;
594
595 return token;
596 }
597
deleteToken(tokenInfo * const token)598 static void deleteToken (tokenInfo *const token)
599 {
600 vStringDelete (token->string);
601 vStringDelete (token->scope);
602 eFree (token);
603 }
604
605 /*
606 * Tag generation functions
607 */
608
makeSqlTag(tokenInfo * const token,const sqlKind kind)609 static void makeSqlTag (tokenInfo *const token, const sqlKind kind)
610 {
611 if (SqlKinds [kind].enabled)
612 {
613 const char *const name = vStringValue (token->string);
614 tagEntryInfo e;
615 initTagEntry (&e, name, kind);
616
617 e.lineNumber = token->lineNumber;
618 e.filePosition = token->filePosition;
619
620 if (vStringLength (token->scope) > 0)
621 {
622 Assert (token->scopeKind < SQLTAG_COUNT);
623 e.extensionFields.scopeKindIndex = token->scopeKind;
624 e.extensionFields.scopeName = vStringValue (token->scope);
625
626 if (isXtagEnabled (XTAG_QUALIFIED_TAGS))
627 {
628 vString *fulltag;
629 tagEntryInfo xe = e;
630
631 fulltag = vStringNewCopy (token->scope);
632 vStringPut (fulltag, '.');
633 vStringCat (fulltag, token->string);
634 xe.name = vStringValue (fulltag);
635 markTagExtraBit (&xe, XTAG_QUALIFIED_TAGS);
636 makeTagEntry (&xe);
637 vStringDelete (fulltag);
638 }
639 }
640
641 makeTagEntry (&e);
642 }
643 }
644
645 /*
646 * Parsing functions
647 */
648
parseString(vString * const string,const int delimiter,int * promise)649 static void parseString (vString *const string, const int delimiter, int *promise)
650 {
651 int offset[2];
652 unsigned long linenum[3];
653 enum { START, END, SOURCE };
654
655 int c0;
656
657 if (promise && !isXtagEnabled(XTAG_GUEST))
658 promise = NULL;
659
660 if (promise)
661 {
662 c0 = getcFromInputFile ();
663 linenum[START] = getInputLineNumber ();
664 offset[START] = getInputLineOffset ();
665 linenum[SOURCE] = getSourceLineNumber ();
666 ungetcToInputFile(c0);
667 }
668
669 bool end = false;
670 while (! end)
671 {
672 int c = getcFromInputFile ();
673 if (c == EOF)
674 end = true;
675 /*
676 else if (c == '\\')
677 {
678 c = getcFromInputFile(); // This maybe a ' or ". //
679 vStringPut(string, c);
680 }
681 */
682 else if (c == delimiter)
683 {
684 if (promise)
685 {
686 ungetcToInputFile(c);
687 linenum[END] = getInputLineNumber ();
688 offset[END] = getInputLineOffset ();
689 (void)getcFromInputFile ();
690 *promise = makePromise (NULL,
691 linenum [START], offset [START],
692 linenum [END], offset [END],
693 linenum [SOURCE]);
694 }
695 end = true;
696 }
697 else
698 vStringPut (string, c);
699 }
700 }
701
702 /* Read a C identifier beginning with "firstChar" and places it into "name".
703 */
parseIdentifier(vString * const string,const int firstChar)704 static void parseIdentifier (vString *const string, const int firstChar)
705 {
706 int c = firstChar;
707 Assert (isIdentChar1 (c));
708 do
709 {
710 vStringPut (string, c);
711 c = getcFromInputFile ();
712 } while (isIdentChar (c));
713 if (!isspace (c))
714 ungetcToInputFile (c); /* unget non-identifier character */
715 }
716
isCCFlag(const char * str)717 static bool isCCFlag(const char *str)
718 {
719 return (anyKindEntryInScope(CORK_NIL, str, SQLTAG_PLSQL_CCFLAGS, false) != 0);
720 }
721
722 /* Parse a PostgreSQL: dollar-quoted string
723 * https://www.postgresql.org/docs/current/static/sql-syntax-lexical.html#SQL-SYNTAX-DOLLAR-QUOTING
724 *
725 * The syntax for dollar-quoted string ca collide with PL/SQL inquiry directive ($$name).
726 * https://docs.oracle.com/en/database/oracle/oracle-database/18/lnpls/plsql-language-fundamentals.html#GUID-E918087C-D5A8-4CEE-841B-5333DE6D4C15
727 * https://github.com/universal-ctags/ctags/issues/3006
728 */
parseDollarQuote(vString * const string,const int delimiter,int * promise)729 static tokenType parseDollarQuote (vString *const string, const int delimiter, int *promise)
730 {
731 int offset[2];
732 unsigned long linenum[3];
733 enum { START, END, SOURCE };
734
735 unsigned int len = 0;
736 char tag[32 /* arbitrary limit */] = {0};
737 int c = 0;
738
739 /* read the tag */
740 tag[len++] = (char) delimiter;
741 while ((len + 1) < sizeof tag && c != delimiter)
742 {
743 c = getcFromInputFile ();
744 if (isIdentChar(c))
745 tag[len++] = (char) c;
746 else
747 break;
748 }
749 tag[len] = 0;
750
751 bool empty_tag = (len == 2);
752
753 if (c != delimiter)
754 {
755 /* damn that's not valid, what can we do? */
756 ungetcToInputFile (c);
757 return TOKEN_UNDEFINED;
758 }
759
760 if (promise && !isXtagEnabled(XTAG_GUEST))
761 promise = NULL;
762
763 if (promise)
764 {
765 linenum[START] = getInputLineNumber ();
766 offset[START] = getInputLineOffset ();
767 linenum[SOURCE] = getSourceLineNumber ();
768 }
769
770 /* and read the content (until a matching end tag) */
771 while ((c = getcFromInputFile ()) != EOF)
772 {
773 if (c != delimiter)
774 {
775 vStringPut (string, c);
776 if (empty_tag
777 && (KEYWORD_inquiry_directive == lookupCaseKeyword (vStringValue (string),
778 Lang_sql)
779 || isCCFlag(vStringValue (string))))
780 {
781 /* PL/SQL inquiry directives */
782 int c0 = getcFromInputFile ();
783
784 if (c0 != delimiter && (isalnum(c0) || c0 == '_'))
785 {
786 vStringPut (string, c0);
787 continue;
788 }
789
790 ungetcToInputFile (c0);
791 /* Oracle PL/SQL's inquiry directive ($$name) */
792 return TOKEN_UNDEFINED;
793 }
794 }
795 else
796 {
797 char *end_p = tag;
798
799 while (c != EOF && *end_p && ((int) c) == *end_p)
800 {
801 c = getcFromInputFile ();
802 end_p++;
803 }
804
805 if (c != EOF)
806 ungetcToInputFile (c);
807
808 if (! *end_p) /* full tag match */
809 {
810 if (promise)
811 {
812 linenum[END] = getInputLineNumber ();
813 offset[END] = getInputLineOffset ();
814 if (offset[END] > len)
815 offset[END] -= len;
816 *promise = makePromise (NULL,
817 linenum [START], offset [START],
818 linenum [END], offset [END],
819 linenum [SOURCE]);
820 }
821 break;
822 }
823 else
824 vStringNCatS (string, tag, (size_t) (end_p - tag));
825 }
826 }
827
828 return TOKEN_STRING;
829 }
830
readToken(tokenInfo * const token)831 static void readToken (tokenInfo *const token)
832 {
833 int c;
834
835 token->type = TOKEN_UNDEFINED;
836 token->keyword = KEYWORD_NONE;
837 vStringClear (token->string);
838 token->promise = -1;
839
840 getNextChar:
841 do
842 {
843 c = getcFromInputFile ();
844 token->lineNumber = getInputLineNumber ();
845 token->filePosition = getInputFilePosition ();
846 /*
847 * Added " to the list of ignores, not sure what this
848 * might break but it gets by this issue:
849 * create table "t1" (...)
850 *
851 * Darren, the code passes all my tests for both
852 * Oracle and SQL Anywhere, but maybe you can tell me
853 * what this may effect.
854 */
855 }
856 while (c == '\t' || c == ' ' || c == '\n');
857
858 switch (c)
859 {
860 case EOF: token->type = TOKEN_EOF; break;
861 case '(': token->type = TOKEN_OPEN_PAREN; break;
862 case ')': token->type = TOKEN_CLOSE_PAREN; break;
863 case ':': token->type = TOKEN_COLON; break;
864 case ';': token->type = TOKEN_SEMICOLON; break;
865 case '.': token->type = TOKEN_PERIOD; break;
866 case ',': token->type = TOKEN_COMMA; break;
867 case '{': token->type = TOKEN_OPEN_CURLY; break;
868 case '}': token->type = TOKEN_CLOSE_CURLY; break;
869 case '~': token->type = TOKEN_TILDE; break;
870 case '[': token->type = TOKEN_OPEN_SQUARE; break;
871 case ']': token->type = TOKEN_CLOSE_SQUARE; break;
872 case '=': token->type = TOKEN_EQUAL; break;
873
874 case '\'':
875 case '"':
876 token->type = TOKEN_STRING;
877 parseString (token->string, c, &token->promise);
878 token->lineNumber = getInputLineNumber ();
879 token->filePosition = getInputFilePosition ();
880 break;
881
882 case '#':
883 skipToCharacterInInputFile ('\n');
884 goto getNextChar;
885 case '-':
886 c = getcFromInputFile ();
887 if (c == '-') /* -- is this the start of a comment? */
888 {
889 skipToCharacterInInputFile ('\n');
890 goto getNextChar;
891 }
892 else
893 {
894 if (!isspace (c))
895 ungetcToInputFile (c);
896 token->type = TOKEN_OPERATOR;
897 }
898 break;
899
900 case '<':
901 case '>':
902 {
903 const int initial = c;
904 int d = getcFromInputFile ();
905 if (d == initial)
906 {
907 if (initial == '<')
908 token->type = TOKEN_BLOCK_LABEL_BEGIN;
909 else
910 token->type = TOKEN_BLOCK_LABEL_END;
911 }
912 else
913 {
914 ungetcToInputFile (d);
915 token->type = TOKEN_UNDEFINED;
916 }
917 break;
918 }
919
920 case '\\':
921 c = getcFromInputFile ();
922 if (c != '\\' && c != '"' && c != '\'' && !isspace (c))
923 ungetcToInputFile (c);
924 token->type = TOKEN_CHARACTER;
925 token->lineNumber = getInputLineNumber ();
926 token->filePosition = getInputFilePosition ();
927 break;
928
929 case '/':
930 {
931 int d = getcFromInputFile ();
932 if ((d != '*') && /* is this the start of a comment? */
933 (d != '/')) /* is a one line comment? */
934 {
935 token->type = TOKEN_FORWARD_SLASH;
936 ungetcToInputFile (d);
937 }
938 else
939 {
940 if (d == '*')
941 {
942 skipToCharacterInInputFile2('*', '/');
943 goto getNextChar;
944 }
945 else if (d == '/') /* is this the start of a comment? */
946 {
947 skipToCharacterInInputFile ('\n');
948 goto getNextChar;
949 }
950 }
951 break;
952 }
953
954 case '$':
955 token->type = parseDollarQuote (token->string, c, &token->promise);
956 token->lineNumber = getInputLineNumber ();
957 token->filePosition = getInputFilePosition ();
958 break;
959
960 default:
961 if (! isIdentChar1 (c))
962 token->type = TOKEN_UNDEFINED;
963 else
964 {
965 parseIdentifier (token->string, c);
966 token->lineNumber = getInputLineNumber ();
967 token->filePosition = getInputFilePosition ();
968 token->keyword = lookupCaseKeyword (vStringValue (token->string), Lang_sql);
969 if (isKeyword (token, KEYWORD_rem))
970 {
971 vStringClear (token->string);
972 skipToCharacterInInputFile ('\n');
973 goto getNextChar;
974 }
975 else if (isKeyword (token, KEYWORD_NONE))
976 token->type = TOKEN_IDENTIFIER;
977 else
978 token->type = TOKEN_KEYWORD;
979 }
980 break;
981 }
982 }
983
984 /*
985 * reads an identifier, possibly quoted:
986 * identifier
987 * "identifier"
988 * [identifier]
989 */
readIdentifier(tokenInfo * const token)990 static void readIdentifier (tokenInfo *const token)
991 {
992 readToken (token);
993 if (isType (token, TOKEN_OPEN_SQUARE))
994 {
995 tokenInfo *const close_square = newToken ();
996
997 readToken (token);
998 /* eat close square */
999 readToken (close_square);
1000 deleteToken (close_square);
1001 }
1002 }
1003
1004 /*
1005 * Token parsing functions
1006 */
1007
1008 /*
1009 * static void addContext (tokenInfo* const parent, const tokenInfo* const child)
1010 * {
1011 * if (vStringLength (parent->string) > 0)
1012 * {
1013 * vStringPut (parent->string, '.');
1014 * }
1015 * vStringCat (parent->string, child->string);
1016 * }
1017 */
1018
addToScope(tokenInfo * const token,vString * const extra,sqlKind kind)1019 static void addToScope (tokenInfo* const token, vString* const extra, sqlKind kind)
1020 {
1021 if (vStringLength (token->scope) > 0)
1022 {
1023 vStringPut (token->scope, '.');
1024 }
1025 vStringCat (token->scope, extra);
1026 token->scopeKind = kind;
1027 }
1028
1029 /*
1030 * Scanning functions
1031 */
1032
isOneOfKeyword(tokenInfo * const token,const keywordId * const keywords,unsigned int count)1033 static bool isOneOfKeyword (tokenInfo *const token, const keywordId *const keywords, unsigned int count)
1034 {
1035 unsigned int i;
1036 for (i = 0; i < count; i++)
1037 {
1038 if (isKeyword (token, keywords[i]))
1039 return true;
1040 }
1041 return false;
1042 }
1043
findTokenOrKeywords(tokenInfo * const token,const tokenType type,const keywordId * const keywords,unsigned int kcount)1044 static void findTokenOrKeywords (tokenInfo *const token, const tokenType type,
1045 const keywordId *const keywords,
1046 unsigned int kcount)
1047 {
1048 while (! isType (token, type) &&
1049 ! (isType (token, TOKEN_KEYWORD) && isOneOfKeyword (token, keywords, kcount)) &&
1050 ! isType (token, TOKEN_EOF))
1051 {
1052 readToken (token);
1053 }
1054 }
1055
findToken(tokenInfo * const token,const tokenType type)1056 static void findToken (tokenInfo *const token, const tokenType type)
1057 {
1058 while (! isType (token, type) &&
1059 ! isType (token, TOKEN_EOF))
1060 {
1061 readToken (token);
1062 }
1063 }
1064
findCmdTerm(tokenInfo * const token,const bool check_first)1065 static void findCmdTerm (tokenInfo *const token, const bool check_first)
1066 {
1067 int begin_end_nest_lvl = token->begin_end_nest_lvl;
1068
1069 if (check_first)
1070 {
1071 if (isCmdTerm(token))
1072 return;
1073 }
1074 do
1075 {
1076 readToken (token);
1077 } while (! isCmdTerm(token) &&
1078 ! isMatchedEnd(token, begin_end_nest_lvl) &&
1079 ! isType (token, TOKEN_EOF));
1080 }
1081
skipToMatched(tokenInfo * const token)1082 static void skipToMatched(tokenInfo *const token)
1083 {
1084 int nest_level = 0;
1085 tokenType open_token;
1086 tokenType close_token;
1087
1088 switch (token->type)
1089 {
1090 case TOKEN_OPEN_PAREN:
1091 open_token = TOKEN_OPEN_PAREN;
1092 close_token = TOKEN_CLOSE_PAREN;
1093 break;
1094 case TOKEN_OPEN_CURLY:
1095 open_token = TOKEN_OPEN_CURLY;
1096 close_token = TOKEN_CLOSE_CURLY;
1097 break;
1098 case TOKEN_OPEN_SQUARE:
1099 open_token = TOKEN_OPEN_SQUARE;
1100 close_token = TOKEN_CLOSE_SQUARE;
1101 break;
1102 default:
1103 return;
1104 }
1105
1106 /*
1107 * This routine will skip to a matching closing token.
1108 * It will also handle nested tokens like the (, ) below.
1109 * ( name varchar(30), text binary(10) )
1110 */
1111
1112 if (isType (token, open_token))
1113 {
1114 nest_level++;
1115 while (nest_level > 0 && !isType (token, TOKEN_EOF))
1116 {
1117 readToken (token);
1118 if (isType (token, open_token))
1119 {
1120 nest_level++;
1121 }
1122 if (isType (token, close_token))
1123 {
1124 if (nest_level > 0)
1125 {
1126 nest_level--;
1127 }
1128 }
1129 }
1130 readToken (token);
1131 }
1132 }
1133
copyToken(tokenInfo * const dest,tokenInfo * const src)1134 static void copyToken (tokenInfo *const dest, tokenInfo *const src)
1135 {
1136 dest->lineNumber = src->lineNumber;
1137 dest->filePosition = src->filePosition;
1138 dest->type = src->type;
1139 dest->keyword = src->keyword;
1140 vStringCopy(dest->string, src->string);
1141 vStringCopy(dest->scope, src->scope);
1142 dest->scopeKind = src->scopeKind;
1143 }
1144
skipArgumentList(tokenInfo * const token)1145 static void skipArgumentList (tokenInfo *const token)
1146 {
1147 /*
1148 * Other databases can have arguments with fully declared
1149 * datatypes:
1150 * ( name varchar(30), text binary(10) )
1151 * So we must check for nested open and closing parentheses
1152 */
1153
1154 if (isType (token, TOKEN_OPEN_PAREN)) /* arguments? */
1155 {
1156 skipToMatched (token);
1157 }
1158 }
1159
getNamedLanguageFromToken(tokenInfo * const token)1160 static langType getNamedLanguageFromToken(tokenInfo *const token)
1161 {
1162 langType lang = LANG_IGNORE;
1163
1164 if (isType (token, TOKEN_IDENTIFIER))
1165 {
1166 if (vStringLength (token->string) > 2
1167 && vStringValue (token->string) [0] == 'p'
1168 && vStringValue (token->string) [1] == 'l')
1169 {
1170 /* Remove first 'pl' and last 'u' for extracting the
1171 * name of the language. */
1172 bool unsafe = (vStringLast(token->string) == 'u');
1173 lang = getNamedLanguageOrAlias (vStringValue (token->string) + 2,
1174 vStringLength (token->string)
1175 - 2
1176 - (unsafe? 1: 0));
1177 }
1178 }
1179 return lang;
1180 }
1181
parseSubProgram(tokenInfo * const token)1182 static void parseSubProgram (tokenInfo *const token)
1183 {
1184 tokenInfo *const name = newToken ();
1185 vString * saveScope = vStringNew ();
1186 sqlKind saveScopeKind;
1187
1188 /*
1189 * This must handle both prototypes and the body of
1190 * the procedures.
1191 *
1192 * Prototype:
1193 * FUNCTION func_name RETURN integer;
1194 * PROCEDURE proc_name( parameters );
1195 * Procedure
1196 * FUNCTION GET_ML_USERNAME RETURN VARCHAR2
1197 * IS
1198 * BEGIN
1199 * RETURN v_sync_user_id;
1200 * END GET_ML_USERNAME;
1201 *
1202 * PROCEDURE proc_name( parameters )
1203 * IS
1204 * BEGIN
1205 * END;
1206 * CREATE PROCEDURE proc_name( parameters )
1207 * EXTERNAL NAME ... ;
1208 * CREATE PROCEDURE proc_name( parameters )
1209 * BEGIN
1210 * END;
1211 *
1212 * CREATE FUNCTION f_GetClassName(
1213 * IN @object VARCHAR(128)
1214 * ,IN @code VARCHAR(128)
1215 * )
1216 * RETURNS VARCHAR(200)
1217 * DETERMINISTIC
1218 * BEGIN
1219 *
1220 * IF( @object = 'user_state' ) THEN
1221 * SET something = something;
1222 * END IF;
1223 *
1224 * RETURN @name;
1225 * END;
1226 *
1227 * Note, a Package adds scope to the items within.
1228 * create or replace package demo_pkg is
1229 * test_var number;
1230 * function test_func return varchar2;
1231 * function more.test_func2 return varchar2;
1232 * end demo_pkg;
1233 * So the tags generated here, contain the package name:
1234 * demo_pkg.test_var
1235 * demo_pkg.test_func
1236 * demo_pkg.more.test_func2
1237 */
1238 const sqlKind kind = isKeyword (token, KEYWORD_function) ?
1239 SQLTAG_FUNCTION : SQLTAG_PROCEDURE;
1240 Assert (isKeyword (token, KEYWORD_function) ||
1241 isKeyword (token, KEYWORD_procedure));
1242
1243 vStringCopy(saveScope, token->scope);
1244 saveScopeKind = token->scopeKind;
1245 readToken (token);
1246 copyToken (name, token);
1247 readToken (token);
1248
1249 if (isType (token, TOKEN_PERIOD))
1250 {
1251 /*
1252 * If this is an Oracle package, then the token->scope should
1253 * already be set. If this is the case, also add this value to the
1254 * scope.
1255 * If this is not an Oracle package, chances are the scope should be
1256 * blank and the value just read is the OWNER or CREATOR of the
1257 * function and should not be considered part of the scope.
1258 */
1259 if (vStringLength(saveScope) > 0)
1260 {
1261 addToScope(token, name->string, kind);
1262 }
1263 readToken (token);
1264 copyToken (name, token);
1265 readToken (token);
1266 }
1267 if (isType (token, TOKEN_OPEN_PAREN))
1268 {
1269 /* Reads to the next token after the TOKEN_CLOSE_PAREN */
1270 skipArgumentList(token);
1271 }
1272
1273 if (kind == SQLTAG_FUNCTION)
1274 {
1275 if (isKeyword (token, KEYWORD_return) ||
1276 isKeyword (token, KEYWORD_returns))
1277 {
1278 /* Read datatype */
1279 readToken (token);
1280 /*
1281 * Read token after which could be the
1282 * command terminator if a prototype
1283 * or an open parenthesis
1284 */
1285 readToken (token);
1286 if (isType (token, TOKEN_OPEN_PAREN))
1287 {
1288 /* Reads to the next token after the TOKEN_CLOSE_PAREN */
1289 skipArgumentList(token);
1290 }
1291 }
1292 }
1293 if (isCmdTerm (token))
1294 {
1295 makeSqlTag (name, SQLTAG_PROTOTYPE);
1296 }
1297 else
1298 {
1299 langType lang = LANG_IGNORE;
1300
1301 while (! isKeyword (token, KEYWORD_is) &&
1302 ! isKeyword (token, KEYWORD_begin) &&
1303 ! isKeyword (token, KEYWORD_at) &&
1304 ! isKeyword (token, KEYWORD_internal) &&
1305 ! isKeyword (token, KEYWORD_external) &&
1306 ! isKeyword (token, KEYWORD_url) &&
1307 ! isType (token, TOKEN_EQUAL) &&
1308 ! isType (token, TOKEN_EOF) &&
1309 ! isCmdTerm (token))
1310 {
1311 if (isKeyword (token, KEYWORD_result))
1312 {
1313 readToken (token);
1314 if (isType (token, TOKEN_OPEN_PAREN))
1315 {
1316 /* Reads to the next token after the TOKEN_CLOSE_PAREN */
1317 skipArgumentList(token);
1318 }
1319 } else if (lang == LANG_IGNORE
1320 && isKeyword (token, KEYWORD_language)) {
1321 readToken (token);
1322 lang = getNamedLanguageFromToken (token);
1323 if (lang != LANG_IGNORE)
1324 readToken (token);
1325 } else {
1326 readToken (token);
1327 }
1328 }
1329 if (isKeyword (token, KEYWORD_at) ||
1330 isKeyword (token, KEYWORD_url) ||
1331 isKeyword (token, KEYWORD_internal) ||
1332 isKeyword (token, KEYWORD_external))
1333 {
1334 addToScope(token, name->string, kind);
1335 if (isType (name, TOKEN_IDENTIFIER) ||
1336 isType (name, TOKEN_STRING) ||
1337 isType (name, TOKEN_KEYWORD))
1338 {
1339 makeSqlTag (name, kind);
1340 }
1341
1342 vStringClear (token->scope);
1343 token->scopeKind = SQLTAG_COUNT;
1344 }
1345 if (isType (token, TOKEN_EQUAL))
1346 readToken (token);
1347
1348 if (isKeyword (token, KEYWORD_declare))
1349 parseDeclare (token, false);
1350
1351 if (isKeyword (token, KEYWORD_is) ||
1352 isKeyword (token, KEYWORD_begin))
1353 {
1354 addToScope(token, name->string, kind);
1355 if (isType (name, TOKEN_IDENTIFIER) ||
1356 isType (name, TOKEN_STRING) ||
1357 isType (name, TOKEN_KEYWORD))
1358 {
1359 makeSqlTag (name, kind);
1360 }
1361
1362 parseBlockFull (token, true, lang);
1363 vStringClear (token->scope);
1364 token->scopeKind = SQLTAG_COUNT;
1365 }
1366 }
1367 vStringCopy(token->scope, saveScope);
1368 token->scopeKind = saveScopeKind;
1369 deleteToken (name);
1370 vStringDelete(saveScope);
1371 }
1372
parseRecord(tokenInfo * const token)1373 static void parseRecord (tokenInfo *const token)
1374 {
1375 /*
1376 * Make it a bit forgiving, this is called from
1377 * multiple functions, parseTable, parseType
1378 */
1379 if (!isType (token, TOKEN_OPEN_PAREN))
1380 readToken (token);
1381 if (!isType (token, TOKEN_OPEN_PAREN))
1382 return;
1383
1384 do
1385 {
1386 if (isType (token, TOKEN_COMMA) ||
1387 isType (token, TOKEN_OPEN_PAREN))
1388 {
1389 readToken (token);
1390 }
1391
1392 /*
1393 * Create table statements can end with various constraints
1394 * which must be excluded from the SQLTAG_FIELD.
1395 * create table t1 (
1396 * c1 integer,
1397 * c2 char(30),
1398 * c3 numeric(10,5),
1399 * c4 integer,
1400 * constraint whatever,
1401 * primary key(c1),
1402 * foreign key (),
1403 * check ()
1404 * )
1405 */
1406 if (! isKeyword(token, KEYWORD_primary) &&
1407 ! isKeyword(token, KEYWORD_references) &&
1408 ! isKeyword(token, KEYWORD_unique) &&
1409 ! isKeyword(token, KEYWORD_check) &&
1410 ! isKeyword(token, KEYWORD_constraint) &&
1411 ! isKeyword(token, KEYWORD_foreign))
1412 {
1413 /* keyword test above is redundant as only a TOKEN_KEYWORD could
1414 * match any isKeyword() anyway */
1415 if (isType (token, TOKEN_IDENTIFIER) ||
1416 isType (token, TOKEN_STRING) ||
1417 (isType (token, TOKEN_KEYWORD)
1418 && (!isReservedWord (token))))
1419 {
1420 makeSqlTag (token, SQLTAG_FIELD);
1421 }
1422 }
1423
1424 while (! isType (token, TOKEN_COMMA) &&
1425 ! isType (token, TOKEN_CLOSE_PAREN) &&
1426 ! isType (token, TOKEN_OPEN_PAREN) &&
1427 ! isType (token, TOKEN_EOF))
1428 {
1429 readToken (token);
1430 /*
1431 * A table structure can look like this:
1432 * create table t1 (
1433 * c1 integer,
1434 * c2 char(30),
1435 * c3 numeric(10,5),
1436 * c4 integer
1437 * )
1438 * We can't just look for a COMMA or CLOSE_PAREN
1439 * since that will not deal with the numeric(10,5)
1440 * case. So we need to skip the argument list
1441 * when we find an open paren.
1442 */
1443 if (isType (token, TOKEN_OPEN_PAREN))
1444 {
1445 /* Reads to the next token after the TOKEN_CLOSE_PAREN */
1446 skipArgumentList(token);
1447 }
1448 }
1449 } while (! isType (token, TOKEN_CLOSE_PAREN) &&
1450 ! isType (token, TOKEN_EOF));
1451 }
1452
parseType(tokenInfo * const token)1453 static void parseType (tokenInfo *const token)
1454 {
1455 tokenInfo *const name = newToken ();
1456 vString * saveScope = vStringNew ();
1457 sqlKind saveScopeKind;
1458
1459 vStringCopy(saveScope, token->scope);
1460 /* If a scope has been set, add it to the name */
1461 addToScope (name, token->scope, token->scopeKind);
1462 saveScopeKind = token->scopeKind;
1463 readToken (name);
1464 if (isType (name, TOKEN_IDENTIFIER))
1465 {
1466 readToken (token);
1467 if (isKeyword (token, KEYWORD_is))
1468 {
1469 readToken (token);
1470 switch (token->keyword)
1471 {
1472 case KEYWORD_record:
1473 case KEYWORD_object:
1474 makeSqlTag (name, SQLTAG_RECORD);
1475 addToScope (token, name->string, SQLTAG_RECORD);
1476 parseRecord (token);
1477 break;
1478
1479 case KEYWORD_table:
1480 makeSqlTag (name, SQLTAG_TABLE);
1481 break;
1482
1483 case KEYWORD_ref:
1484 readToken (token);
1485 if (isKeyword (token, KEYWORD_cursor))
1486 makeSqlTag (name, SQLTAG_CURSOR);
1487 break;
1488
1489 default: break;
1490 }
1491 vStringClear (token->scope);
1492 token->scopeKind = SQLTAG_COUNT;
1493 }
1494 }
1495 vStringCopy(token->scope, saveScope);
1496 token->scopeKind = saveScopeKind;
1497 deleteToken (name);
1498 vStringDelete(saveScope);
1499 }
1500
parseSimple(tokenInfo * const token,const sqlKind kind)1501 static void parseSimple (tokenInfo *const token, const sqlKind kind)
1502 {
1503 /* This will simply make the tagname from the first word found */
1504 readToken (token);
1505 if (isType (token, TOKEN_IDENTIFIER) ||
1506 isType (token, TOKEN_STRING))
1507 {
1508 makeSqlTag (token, kind);
1509 }
1510 }
1511
parseDeclare(tokenInfo * const token,const bool local)1512 static void parseDeclare (tokenInfo *const token, const bool local)
1513 {
1514 /*
1515 * PL/SQL declares are of this format:
1516 * IS|AS
1517 * [declare]
1518 * CURSOR curname ...
1519 * varname1 datatype;
1520 * varname2 datatype;
1521 * varname3 datatype;
1522 * begin
1523 */
1524
1525 if (isKeyword (token, KEYWORD_declare))
1526 readToken (token);
1527 while (! isKeyword (token, KEYWORD_begin) &&
1528 ! isKeyword (token, KEYWORD_end) &&
1529 ! isType (token, TOKEN_EOF))
1530 {
1531 keywordId stoppers [] = {
1532 KEYWORD_begin,
1533 KEYWORD_end,
1534 };
1535
1536 switch (token->keyword)
1537 {
1538 case KEYWORD_cursor: parseSimple (token, SQLTAG_CURSOR); break;
1539 case KEYWORD_function: parseSubProgram (token); break;
1540 case KEYWORD_procedure: parseSubProgram (token); break;
1541 case KEYWORD_subtype: parseSimple (token, SQLTAG_SUBTYPE); break;
1542 case KEYWORD_trigger: parseSimple (token, SQLTAG_TRIGGER); break;
1543 case KEYWORD_type: parseType (token); break;
1544
1545 default:
1546 if (isType (token, TOKEN_IDENTIFIER))
1547 {
1548 makeSqlTag (token, local? SQLTAG_LOCAL_VARIABLE: SQLTAG_VARIABLE);
1549 }
1550 break;
1551 }
1552 findTokenOrKeywords (token, TOKEN_SEMICOLON, stoppers, ARRAY_SIZE (stoppers));
1553 if (isType (token, TOKEN_SEMICOLON))
1554 readToken (token);
1555 }
1556 }
1557
parseDeclareANSI(tokenInfo * const token,const bool local)1558 static void parseDeclareANSI (tokenInfo *const token, const bool local)
1559 {
1560 tokenInfo *const type = newToken ();
1561 /*
1562 * ANSI declares are of this format:
1563 * BEGIN
1564 * DECLARE varname1 datatype;
1565 * DECLARE varname2 datatype;
1566 * ...
1567 *
1568 * This differ from PL/SQL where DECLARE precedes the BEGIN block
1569 * and the DECLARE keyword is not repeated.
1570 */
1571 while (isKeyword (token, KEYWORD_declare))
1572 {
1573 readToken (token);
1574 readToken (type);
1575
1576 if (isKeyword (type, KEYWORD_cursor))
1577 makeSqlTag (token, SQLTAG_CURSOR);
1578 else if (isKeyword (token, KEYWORD_local) &&
1579 isKeyword (type, KEYWORD_temporary))
1580 {
1581 /*
1582 * DECLARE LOCAL TEMPORARY TABLE table_name (
1583 * c1 int,
1584 * c2 int
1585 * );
1586 */
1587 readToken (token);
1588 if (isKeyword (token, KEYWORD_table))
1589 {
1590 readToken (token);
1591 if (isType(token, TOKEN_IDENTIFIER) ||
1592 isType(token, TOKEN_STRING))
1593 {
1594 makeSqlTag (token, SQLTAG_TABLE);
1595 }
1596 }
1597 }
1598 else if (isType (token, TOKEN_IDENTIFIER) ||
1599 isType (token, TOKEN_STRING))
1600 {
1601 makeSqlTag (token, local? SQLTAG_LOCAL_VARIABLE: SQLTAG_VARIABLE);
1602 }
1603 findToken (token, TOKEN_SEMICOLON);
1604 readToken (token);
1605 }
1606 deleteToken (type);
1607 }
1608
parseLabel(tokenInfo * const token)1609 static void parseLabel (tokenInfo *const token)
1610 {
1611 /*
1612 * A label has this format:
1613 * <<tobacco_dependency>>
1614 * DECLARE
1615 * v_senator VARCHAR2(100) := 'THURMOND, JESSE';
1616 * BEGIN
1617 * IF total_contributions (v_senator, 'TOBACCO') > 25000
1618 * THEN
1619 * <<alochol_dependency>>
1620 * DECLARE
1621 * v_senator VARCHAR2(100) := 'WHATEVERIT, TAKES';
1622 * BEGIN
1623 * ...
1624 */
1625
1626 Assert (isType (token, TOKEN_BLOCK_LABEL_BEGIN));
1627 readToken (token);
1628 if (isType (token, TOKEN_IDENTIFIER))
1629 {
1630 makeSqlTag (token, SQLTAG_BLOCK_LABEL);
1631 readToken (token); /* read end of label */
1632 }
1633 }
1634
parseStatements(tokenInfo * const token,const bool exit_on_endif)1635 static void parseStatements (tokenInfo *const token, const bool exit_on_endif )
1636 {
1637 /* bool isAnsi = true; */
1638 bool stmtTerm = false;
1639 do
1640 {
1641
1642 if (isType (token, TOKEN_BLOCK_LABEL_BEGIN))
1643 parseLabel (token);
1644 else
1645 {
1646 switch (token->keyword)
1647 {
1648 case KEYWORD_exception:
1649 /*
1650 * EXCEPTION
1651 * <exception handler>;
1652 *
1653 * Where an exception handler could be:
1654 * BEGIN
1655 * WHEN OTHERS THEN
1656 * x := x + 3;
1657 * END;
1658 * In this case we need to skip this keyword and
1659 * move on to the next token without reading until
1660 * TOKEN_SEMICOLON;
1661 */
1662 readToken (token);
1663 continue;
1664
1665 case KEYWORD_when:
1666 /*
1667 * WHEN statements can be used in exception clauses
1668 * and CASE statements. The CASE statement should skip
1669 * these given below we skip over to an END statement.
1670 * But for an exception clause, we can have:
1671 * EXCEPTION
1672 * WHEN OTHERS THEN
1673 * BEGIN
1674 * x := x + 3;
1675 * END;
1676 * If we skip to the TOKEN_SEMICOLON, we miss the begin
1677 * of a nested BEGIN END block. So read the next token
1678 * after the THEN and restart the LOOP.
1679 */
1680 while (! isKeyword (token, KEYWORD_then) &&
1681 ! isType (token, TOKEN_EOF))
1682 readToken (token);
1683
1684 readToken (token);
1685 continue;
1686
1687 case KEYWORD_if:
1688 /*
1689 * We do not want to look for a ; since for an empty
1690 * IF block, it would skip over the END.
1691 * IF...THEN
1692 * END IF;
1693 *
1694 * IF...THEN
1695 * ELSE
1696 * END IF;
1697 *
1698 * IF...THEN
1699 * ELSEIF...THEN
1700 * ELSE
1701 * END IF;
1702 *
1703 * or non-ANSI
1704 * IF ...
1705 * BEGIN
1706 * END
1707 */
1708 while (! isKeyword (token, KEYWORD_then) &&
1709 ! isKeyword (token, KEYWORD_begin) &&
1710 ! isType (token, TOKEN_EOF))
1711 {
1712 readToken (token);
1713 }
1714
1715 if (isKeyword (token, KEYWORD_begin))
1716 {
1717 /* isAnsi = false; */
1718 parseBlock(token, false);
1719
1720 /*
1721 * Handle the non-Ansi IF blocks.
1722 * parseBlock consumes the END, so if the next
1723 * token in a command terminator (like GO)
1724 * we know we are done with this statement.
1725 */
1726 if (isCmdTerm (token))
1727 stmtTerm = true;
1728 }
1729 else
1730 {
1731 readToken (token);
1732
1733 while (! isKeyword (token, KEYWORD_end) &&
1734 ! isKeyword (token, KEYWORD_endif) &&
1735 ! isType (token, TOKEN_EOF))
1736 {
1737 if (isKeyword (token, KEYWORD_else) ||
1738 isKeyword (token, KEYWORD_elseif))
1739 {
1740 readToken (token);
1741 }
1742
1743 parseStatements (token, true);
1744
1745 if (isCmdTerm(token))
1746 readToken (token);
1747
1748 }
1749
1750 /*
1751 * parseStatements returns when it finds an END, an IF
1752 * should follow the END for ANSI anyway.
1753 * IF...THEN
1754 * END IF;
1755 */
1756 if (isKeyword (token, KEYWORD_end))
1757 readToken (token);
1758
1759 if (isKeyword (token, KEYWORD_if) ||
1760 isKeyword (token, KEYWORD_endif))
1761 {
1762 readToken (token);
1763 if (isCmdTerm(token))
1764 stmtTerm = true;
1765 }
1766 else
1767 {
1768 /*
1769 * Well we need to do something here.
1770 * There are lots of different END statements
1771 * END;
1772 * END CASE;
1773 * ENDIF;
1774 * ENDCASE;
1775 */
1776 }
1777 }
1778 break;
1779
1780 case KEYWORD_loop:
1781 case KEYWORD_case:
1782 case KEYWORD_for:
1783 /*
1784 * LOOP...
1785 * END LOOP;
1786 *
1787 * CASE
1788 * WHEN '1' THEN
1789 * END CASE;
1790 *
1791 * FOR loop_name AS cursor_name CURSOR FOR ...
1792 * DO
1793 * END FOR;
1794 */
1795 if (isKeyword (token, KEYWORD_for))
1796 {
1797 /* loop name */
1798 readToken (token);
1799 /* AS */
1800 readToken (token);
1801
1802 while (! isKeyword (token, KEYWORD_is) &&
1803 ! isType (token, TOKEN_EOF))
1804 {
1805 /*
1806 * If this is not an AS keyword this is
1807 * not a proper FOR statement and should
1808 * simply be ignored
1809 */
1810 return;
1811 }
1812
1813 while (! isKeyword (token, KEYWORD_do) &&
1814 ! isType (token, TOKEN_EOF))
1815 readToken (token);
1816 }
1817
1818
1819 readToken (token);
1820 while (! isKeyword (token, KEYWORD_end) &&
1821 ! isType (token, TOKEN_EOF))
1822 {
1823 /*
1824 if ( isKeyword (token, KEYWORD_else) ||
1825 isKeyword (token, KEYWORD_elseif) )
1826 readToken (token);
1827 */
1828
1829 parseStatements (token, false);
1830
1831 if (isCmdTerm(token))
1832 readToken (token);
1833 }
1834
1835
1836 if (isKeyword (token, KEYWORD_end ))
1837 readToken (token);
1838
1839 /*
1840 * Typically ended with
1841 * END LOOP [loop name];
1842 * END CASE
1843 * END FOR [loop name];
1844 */
1845 if (isKeyword (token, KEYWORD_loop) ||
1846 isKeyword (token, KEYWORD_case) ||
1847 isKeyword (token, KEYWORD_for))
1848 {
1849 readToken (token);
1850 }
1851
1852 if (isCmdTerm(token))
1853 stmtTerm = true;
1854
1855 break;
1856
1857 case KEYWORD_create:
1858 readToken (token);
1859 parseKeywords(token);
1860 break;
1861
1862 case KEYWORD_declare:
1863 case KEYWORD_begin:
1864 parseBlock (token, true);
1865 break;
1866
1867 case KEYWORD_end:
1868 break;
1869
1870 default:
1871 readToken (token);
1872 break;
1873 }
1874 /*
1875 * Not all statements must end in a semi-colon
1876 * begin
1877 * if current publisher <> 'publish' then
1878 * signal UE_FailStatement
1879 * end if
1880 * end;
1881 * The last statement prior to an end ("signal" above) does
1882 * not need a semi-colon, nor does the end if, since it is
1883 * also the last statement prior to the end of the block.
1884 *
1885 * So we must read to the first semi-colon or an END block
1886 */
1887 while (! stmtTerm &&
1888 ! isKeyword (token, KEYWORD_end) &&
1889 ! isCmdTerm(token) &&
1890 ! isType(token, TOKEN_EOF))
1891 {
1892 if (exit_on_endif && isKeyword (token, KEYWORD_endif))
1893 return;
1894
1895 if (isType (token, TOKEN_COLON) )
1896 {
1897 /*
1898 * A : can signal a loop name
1899 * myloop:
1900 * LOOP
1901 * LEAVE myloop;
1902 * END LOOP;
1903 * Unfortunately, labels do not have a
1904 * cmd terminator, therefore we have to check
1905 * if the next token is a keyword and process
1906 * it accordingly.
1907 */
1908 readToken (token);
1909 if (isKeyword (token, KEYWORD_loop) ||
1910 isKeyword (token, KEYWORD_while) ||
1911 isKeyword (token, KEYWORD_for))
1912 {
1913 /* parseStatements (token); */
1914 return;
1915 }
1916 }
1917
1918 readToken (token);
1919
1920 if (isType (token, TOKEN_OPEN_PAREN) ||
1921 isType (token, TOKEN_OPEN_CURLY) ||
1922 isType (token, TOKEN_OPEN_SQUARE))
1923 {
1924 skipToMatched (token);
1925 }
1926
1927 /*
1928 * Since we know how to parse various statements
1929 * if we detect them, parse them to completion
1930 */
1931 if (isType (token, TOKEN_BLOCK_LABEL_BEGIN) ||
1932 isKeyword (token, KEYWORD_exception) ||
1933 isKeyword (token, KEYWORD_loop) ||
1934 isKeyword (token, KEYWORD_case) ||
1935 isKeyword (token, KEYWORD_for) ||
1936 isKeyword (token, KEYWORD_begin))
1937 {
1938 parseStatements (token, false);
1939 }
1940 else if (isKeyword (token, KEYWORD_if))
1941 parseStatements (token, true);
1942
1943 }
1944 }
1945 /*
1946 * We assumed earlier all statements ended with a command terminator.
1947 * See comment above, now, only read if the current token
1948 * is not a command terminator.
1949 */
1950 if (isCmdTerm(token) && ! stmtTerm)
1951 stmtTerm = true;
1952
1953 } while (! isKeyword (token, KEYWORD_end) &&
1954 ! (exit_on_endif && isKeyword (token, KEYWORD_endif) ) &&
1955 ! isType (token, TOKEN_EOF) &&
1956 ! stmtTerm );
1957 }
1958
parseBlock(tokenInfo * const token,const bool local)1959 static void parseBlock (tokenInfo *const token, const bool local)
1960 {
1961 parseBlockFull (token, local, LANG_IGNORE);
1962 }
1963
parseBlockFull(tokenInfo * const token,const bool local,langType lang)1964 static void parseBlockFull (tokenInfo *const token, const bool local, langType lang)
1965 {
1966 int promise = -1;
1967
1968 if (isType (token, TOKEN_BLOCK_LABEL_BEGIN))
1969 {
1970 parseLabel (token);
1971 readToken (token);
1972 }
1973 if (! isKeyword (token, KEYWORD_begin))
1974 {
1975 readToken (token);
1976 if (isType (token, TOKEN_STRING))
1977 {
1978 /* Likely a PostgreSQL FUNCTION name AS '...'
1979 * https://www.postgresql.org/docs/current/static/sql-createfunction.html */
1980 promise = token->promise;
1981 token->promise = -1;
1982
1983 readToken (token);
1984 while (! isCmdTerm (token)
1985 && !isType (token, TOKEN_EOF))
1986 {
1987 if (lang == LANG_IGNORE &&
1988 isKeyword (token, KEYWORD_language))
1989 {
1990 readToken (token);
1991 lang = getNamedLanguageFromToken (token);
1992 if (lang != LANG_IGNORE)
1993 readToken (token);
1994 }
1995 else
1996 readToken (token);
1997 }
1998
1999 if (promise != -1 && lang != LANG_IGNORE)
2000 promiseUpdateLanguage(promise, lang);
2001 }
2002 else
2003 {
2004 /*
2005 * These are Oracle style declares which generally come
2006 * between an IS/AS and BEGIN block.
2007 */
2008 parseDeclare (token, local);
2009 }
2010 }
2011 if (isKeyword (token, KEYWORD_begin))
2012 {
2013 bool is_transaction = false;
2014
2015 readToken (token);
2016
2017 /* BEGIN of Postgresql initiates a transaction.
2018 *
2019 * BEGIN [ WORK | TRANSACTION ] [ transaction_mode [, ...] ]
2020 *
2021 * BEGIN of MySQL does the same.
2022 *
2023 * BEGIN [WORK]
2024 *
2025 * BEGIN of SQLite does the same.
2026 *
2027 * BEGIN [[DEFERRED | IMMEDIATE | EXCLUSIVE] TRANSACTION]
2028 *
2029 */
2030 if (isCmdTerm(token))
2031 {
2032 is_transaction = true;
2033 readToken (token);
2034 }
2035 else if (isType (token, TOKEN_IDENTIFIER)
2036 && (strcasecmp (vStringValue(token->string), "work") == 0
2037 || strcasecmp (vStringValue(token->string), "transaction") == 0
2038 || (
2039 strcasecmp (vStringValue(token->string), "deferred") == 0
2040 || strcasecmp (vStringValue(token->string), "immediate") == 0
2041 || strcasecmp (vStringValue(token->string), "exclusive") == 0
2042 )
2043 ))
2044 is_transaction = true;
2045 else
2046 {
2047 /*
2048 * Check for ANSI declarations which always follow
2049 * a BEGIN statement. This routine will not advance
2050 * the token if none are found.
2051 */
2052 parseDeclareANSI (token, local);
2053 }
2054
2055 token->begin_end_nest_lvl++;
2056 while (! isKeyword (token, KEYWORD_end) &&
2057 ! (is_transaction && isKeyword(token, KEYWORD_commit)) &&
2058 ! isType (token, TOKEN_EOF))
2059 {
2060 parseStatements (token, false);
2061
2062 if (isCmdTerm(token))
2063 readToken (token);
2064 }
2065 token->begin_end_nest_lvl--;
2066
2067 /*
2068 * Read the next token (we will assume
2069 * it is the command delimiter)
2070 */
2071 readToken (token);
2072
2073 /*
2074 * Check if the END block is terminated
2075 */
2076 if (! isCmdTerm (token))
2077 {
2078 /*
2079 * Not sure what to do here at the moment.
2080 * I think the routine that calls parseBlock
2081 * must expect the next token has already
2082 * been read since it is possible this
2083 * token is not a command delimiter.
2084 */
2085 /* findCmdTerm (token, false); */
2086 }
2087 }
2088 }
2089
parsePackage(tokenInfo * const token)2090 static void parsePackage (tokenInfo *const token)
2091 {
2092 /*
2093 * Packages can be specified in a number of ways:
2094 * CREATE OR REPLACE PACKAGE pkg_name AS
2095 * or
2096 * CREATE OR REPLACE PACKAGE owner.pkg_name AS
2097 * or by specifying a package body
2098 * CREATE OR REPLACE PACKAGE BODY pkg_name AS
2099 * CREATE OR REPLACE PACKAGE BODY owner.pkg_name AS
2100 */
2101 tokenInfo *const name = newToken ();
2102 readIdentifier (name);
2103 if (isKeyword (name, KEYWORD_body))
2104 {
2105 /*
2106 * Ignore the BODY tag since we will process
2107 * the body or prototypes in the same manner
2108 */
2109 readIdentifier (name);
2110 }
2111 /* Check for owner.pkg_name */
2112 while (! isKeyword (token, KEYWORD_is) &&
2113 ! isType (token, TOKEN_EOF))
2114 {
2115 readToken (token);
2116 if ( isType(token, TOKEN_PERIOD) )
2117 {
2118 readIdentifier (name);
2119 }
2120 }
2121 if (isKeyword (token, KEYWORD_is))
2122 {
2123 if (isType (name, TOKEN_IDENTIFIER) ||
2124 isType (name, TOKEN_STRING))
2125 {
2126 makeSqlTag (name, SQLTAG_PACKAGE);
2127 }
2128 addToScope (token, name->string, SQLTAG_PACKAGE);
2129 parseBlock (token, false);
2130 vStringClear (token->scope);
2131 token->scopeKind = SQLTAG_COUNT;
2132 }
2133 findCmdTerm (token, false);
2134 deleteToken (name);
2135 }
2136
parseColumnsAndAliases(tokenInfo * const token)2137 static void parseColumnsAndAliases (tokenInfo *const token)
2138 {
2139 bool columnAcceptable = true;
2140 tokenInfo *const lastId = newToken ();
2141
2142 /*
2143 * -- A
2144 * create table foo as select A;
2145 *
2146 * -- B
2147 * create table foo as select B from ...;
2148 *
2149 * -- D
2150 * create table foo as select C as D from ...;
2151 *
2152 * -- E, F
2153 * create table foo as select E, a.F;
2154 *
2155 * -- G, H
2156 * create table foo as select G, a.H from ...;
2157 *
2158 * -- J, K
2159 * create table foo as select I as J, a.K from ...;
2160 *
2161 * lastID is used for capturing A, B, E, F, G, H, and K.
2162 */
2163 readToken (token);
2164 do
2165 {
2166 if (isType (token, TOKEN_KEYWORD)
2167 && isKeyword (token, KEYWORD_is))
2168 {
2169 readToken (token);
2170 if (isType (token, TOKEN_IDENTIFIER))
2171 {
2172 /* Emit the alias */
2173 makeSqlTag (token, SQLTAG_FIELD);
2174 columnAcceptable = true;
2175 }
2176 lastId->type = TOKEN_UNDEFINED;
2177 }
2178 else if ((isType (token, TOKEN_KEYWORD)
2179 && isKeyword (token, KEYWORD_from))
2180 || isType (token, TOKEN_SEMICOLON)
2181 || isType(token, TOKEN_COMMA))
2182 {
2183 if (lastId->type == TOKEN_IDENTIFIER)
2184 {
2185 /* Emit the column */
2186 makeSqlTag(lastId, SQLTAG_FIELD);
2187 columnAcceptable = true;
2188 }
2189
2190 if (isType(token, TOKEN_COMMA))
2191 lastId->type = TOKEN_UNDEFINED;
2192 else
2193 break;
2194 }
2195 else if (isType (token, TOKEN_OPEN_PAREN))
2196 {
2197 columnAcceptable = false;
2198 skipToMatched (token);
2199 lastId->type = TOKEN_UNDEFINED;
2200 continue;
2201 }
2202 else if (isType (token, TOKEN_PERIOD))
2203 {
2204 lastId->type = TOKEN_UNDEFINED;
2205 }
2206 else if (isType (token, TOKEN_IDENTIFIER))
2207 {
2208 if (columnAcceptable)
2209 copyToken (lastId, token);
2210 }
2211 else
2212 {
2213 columnAcceptable = false;
2214 lastId->type = TOKEN_UNDEFINED;
2215 }
2216
2217 readToken (token);
2218 } while (! isType (token, TOKEN_EOF));
2219
2220 deleteToken (lastId);
2221 }
2222
2223 /* Skip "IF NOT EXISTS"
2224 * https://dev.mysql.com/doc/refman/8.0/en/create-table.html
2225 * https://www.postgresql.org/docs/current/sql-createtable.html
2226 * https://sqlite.org/lang_createtable.html
2227 */
parseIdAfterIfNotExists(tokenInfo * const name,tokenInfo * const token,bool authorization_following)2228 static bool parseIdAfterIfNotExists(tokenInfo *const name,
2229 tokenInfo *const token,
2230 bool authorization_following)
2231 {
2232 if (isKeyword (name, KEYWORD_if)
2233 && (isType (token, TOKEN_IDENTIFIER)
2234 && vStringLength (token->string) == 3
2235 && strcasecmp ("not", vStringValue (token->string)) == 0))
2236 {
2237 readToken (token);
2238 if (isType (token, TOKEN_IDENTIFIER)
2239 && vStringLength (token->string) == 6
2240 && strcasecmp ("exists", vStringValue (token->string)) == 0)
2241 {
2242 readIdentifier (name);
2243 if (authorization_following
2244 && isType (name, TOKEN_IDENTIFIER)
2245 && vStringLength (name->string) == 13
2246 && strcasecmp("authorization", vStringValue(name->string)) == 0)
2247 {
2248 /*
2249 * PostgreSQL:
2250 * - CREATE SCHEMA IF NOT EXISTS AUTHORIZATION role_specification
2251 */
2252 readIdentifier (name);
2253 }
2254 readToken (token);
2255 return true;
2256 }
2257 }
2258 return false;
2259 }
2260
parseTable(tokenInfo * const token)2261 static void parseTable (tokenInfo *const token)
2262 {
2263 tokenInfo *const name = newToken ();
2264 bool emitted = false;
2265
2266 /*
2267 * This deals with these formats:
2268 * create table t1 (c1 int);
2269 * create global temporary table t2 (c1 int);
2270 * create table "t3" (c1 int);
2271 * create table bob.t4 (c1 int);
2272 * create table bob."t5" (c1 int);
2273 * create table "bob"."t6" (c1 int);
2274 * create table bob."t7" (c1 int);
2275 * Proxy tables use this format:
2276 * create existing table bob."t7" AT '...';
2277 * SQL Server and Sybase formats
2278 * create table OnlyTable (
2279 * create table dbo.HasOwner (
2280 * create table [dbo].[HasOwnerSquare] (
2281 * create table master.dbo.HasDb (
2282 * create table master..HasDbNoOwner (
2283 * create table [master].dbo.[HasDbAndOwnerSquare] (
2284 * create table [master]..[HasDbNoOwnerSquare] (
2285 * Oracle and PostgreSQL use this format:
2286 * create table FOO as select...
2287 * MySQL allows omitting "as" like:
2288 * create table FOO select...
2289 * create table FOO (...) select...
2290 * (At least) MYSQL, PostgreSQL, and SQLite takes "IF NOT EXISTS"
2291 * between "table" and a table name:
2292 * create table if not exists foo ...
2293 */
2294
2295 /* This could be a database, owner or table name */
2296 readIdentifier (name);
2297 readToken (token);
2298
2299 parseIdAfterIfNotExists(name, token, false);
2300
2301 if (isType (token, TOKEN_PERIOD))
2302 {
2303 /*
2304 * This could be a owner or table name.
2305 * But this is also a special case since the table can be
2306 * referenced with a blank owner:
2307 * dbname..tablename
2308 */
2309 readIdentifier (name);
2310 /* Check if a blank name was provided */
2311 if (isType (name, TOKEN_PERIOD))
2312 {
2313 readIdentifier (name);
2314 }
2315 readToken (token);
2316 if (isType (token, TOKEN_PERIOD))
2317 {
2318 /* This can only be the table name */
2319 readIdentifier (name);
2320 readToken (token);
2321 }
2322 }
2323 if (isType (token, TOKEN_OPEN_PAREN))
2324 {
2325 if (isType (name, TOKEN_IDENTIFIER) ||
2326 isType (name, TOKEN_STRING) ||
2327 (isType (name, TOKEN_KEYWORD)
2328 && (!isReservedWord (name))))
2329 {
2330 makeSqlTag (name, SQLTAG_TABLE);
2331 emitted = true;
2332
2333 vStringCopy(token->scope, name->string);
2334 token->scopeKind = SQLTAG_TABLE;
2335 parseRecord (token);
2336 vStringClear (token->scope);
2337 token->scopeKind = SQLTAG_COUNT;
2338 readToken (token);
2339 }
2340 else
2341 skipToMatched(token);
2342 }
2343 else if (isKeyword (token, KEYWORD_at))
2344 {
2345 if (isType (name, TOKEN_IDENTIFIER))
2346 {
2347 makeSqlTag (name, SQLTAG_TABLE);
2348 }
2349 }
2350
2351 if (isKeyword (token, KEYWORD_select)
2352 /* KEYWORD_is is for recognizing "as" */
2353 || isKeyword (token, KEYWORD_is))
2354 {
2355 if (isType (name, TOKEN_IDENTIFIER))
2356 {
2357 if (!emitted)
2358 makeSqlTag (name, SQLTAG_TABLE);
2359
2360 if (isKeyword (token, KEYWORD_is))
2361 readToken (token);
2362
2363 if (isKeyword (token, KEYWORD_select))
2364 {
2365 addToScope (token, name->string, SQLTAG_TABLE);
2366 parseColumnsAndAliases (token);
2367 vStringClear (token->scope);
2368 }
2369 }
2370 }
2371 findCmdTerm (token, true);
2372 deleteToken (name);
2373 }
2374
parseIndex(tokenInfo * const token)2375 static void parseIndex (tokenInfo *const token)
2376 {
2377 tokenInfo *const name = newToken ();
2378 tokenInfo *const owner = newToken ();
2379
2380 /*
2381 * This deals with these formats
2382 * create index i1 on t1(c1) create index "i2" on t1(c1)
2383 * create virtual unique clustered index "i3" on t1(c1)
2384 * create unique clustered index "i4" on t1(c1)
2385 * create clustered index "i5" on t1(c1)
2386 * create bitmap index "i6" on t1(c1)
2387 */
2388
2389 readIdentifier (name);
2390 readToken (token);
2391 if (isType (token, TOKEN_PERIOD))
2392 {
2393 readIdentifier (name);
2394 readToken (token);
2395 }
2396 if (isKeyword (token, KEYWORD_on) &&
2397 (isType (name, TOKEN_IDENTIFIER) ||
2398 isType (name, TOKEN_STRING)))
2399 {
2400 readIdentifier (owner);
2401 readToken (token);
2402 if (isType (token, TOKEN_PERIOD))
2403 {
2404 readIdentifier (owner);
2405 readToken (token);
2406 }
2407 addToScope(name, owner->string, SQLTAG_TABLE /* FIXME? */);
2408 makeSqlTag (name, SQLTAG_INDEX);
2409 }
2410 findCmdTerm (token, false);
2411 deleteToken (name);
2412 deleteToken (owner);
2413 }
2414
parseEvent(tokenInfo * const token)2415 static void parseEvent (tokenInfo *const token)
2416 {
2417 tokenInfo *const name = newToken ();
2418
2419 /*
2420 * This deals with these formats
2421 * create event e1 handler begin end;
2422 * create event "e2" handler begin end;
2423 * create event dba."e3" handler begin end;
2424 * create event "dba"."e4" handler begin end;
2425 */
2426
2427 readIdentifier (name);
2428 readToken (token);
2429 if (isType (token, TOKEN_PERIOD))
2430 {
2431 readIdentifier (name);
2432 }
2433 while (! isKeyword (token, KEYWORD_handler) &&
2434 ! isType (token, TOKEN_SEMICOLON) &&
2435 ! isType (token, TOKEN_EOF))
2436 {
2437 readToken (token);
2438 }
2439
2440 if ((isKeyword (token, KEYWORD_handler) ||
2441 isType (token, TOKEN_SEMICOLON))
2442 && (isType (name, TOKEN_IDENTIFIER) ||
2443 isType (name, TOKEN_STRING) ||
2444 (isType (name, TOKEN_KEYWORD)
2445 && (!isReservedWord (name)))))
2446 {
2447 makeSqlTag (name, SQLTAG_EVENT);
2448 }
2449
2450 if (isKeyword (token, KEYWORD_handler))
2451 {
2452 readToken (token);
2453 if (isKeyword (token, KEYWORD_begin))
2454 {
2455 parseBlock (token, true);
2456 }
2457 findCmdTerm (token, true);
2458 }
2459 deleteToken (name);
2460 }
2461
parseTrigger(tokenInfo * const token)2462 static void parseTrigger (tokenInfo *const token)
2463 {
2464 tokenInfo *const name = newToken ();
2465 tokenInfo *const table = newToken ();
2466
2467 /*
2468 * This deals with these formats
2469 * create or replace trigger tr1 begin end;
2470 * create trigger "tr2" begin end;
2471 * drop trigger "droptr1";
2472 * create trigger "tr3" CALL sp_something();
2473 * create trigger "owner"."tr4" begin end;
2474 * create trigger "tr5" not valid;
2475 * create trigger "tr6" begin end;
2476 */
2477
2478 readIdentifier (name);
2479 readToken (token);
2480 if (isType (token, TOKEN_PERIOD))
2481 {
2482 readIdentifier (name);
2483 readToken (token);
2484 }
2485
2486 while (! isKeyword (token, KEYWORD_on) &&
2487 ! isType (token, TOKEN_EOF) &&
2488 ! isCmdTerm (token))
2489 {
2490 readToken (token);
2491 }
2492
2493 /*if (! isType (token, TOKEN_SEMICOLON) ) */
2494 if (! isCmdTerm (token))
2495 {
2496 readToken (table);
2497 readToken (token);
2498 if (isType (token, TOKEN_PERIOD))
2499 {
2500 readToken (table);
2501 readToken (token);
2502 }
2503
2504 while (! isKeyword (token, KEYWORD_begin) &&
2505 ! isKeyword (token, KEYWORD_call) &&
2506 ! isCmdTerm (token) &&
2507 ! isType (token, TOKEN_EOF))
2508 {
2509 if (isKeyword (token, KEYWORD_declare))
2510 {
2511 addToScope(token, name->string, SQLTAG_TRIGGER);
2512 parseDeclare(token, true);
2513 vStringClear(token->scope);
2514 token->scopeKind = SQLTAG_COUNT;
2515 }
2516 else
2517 readToken (token);
2518 }
2519
2520 if (isKeyword (token, KEYWORD_begin) ||
2521 isKeyword (token, KEYWORD_call))
2522 {
2523 addToScope(name, table->string, SQLTAG_TABLE);
2524 makeSqlTag (name, SQLTAG_TRIGGER);
2525 addToScope(token, table->string, SQLTAG_TABLE);
2526 if (isKeyword (token, KEYWORD_begin))
2527 {
2528 parseBlock (token, true);
2529 }
2530 vStringClear(token->scope);
2531 token->scopeKind = SQLTAG_COUNT;
2532 }
2533 }
2534
2535 findCmdTerm (token, true);
2536 deleteToken (name);
2537 deleteToken (table);
2538 }
2539
parsePublication(tokenInfo * const token)2540 static void parsePublication (tokenInfo *const token)
2541 {
2542 tokenInfo *const name = newToken ();
2543
2544 /*
2545 * This deals with these formats
2546 * create or replace publication pu1 ()
2547 * create publication "pu2" ()
2548 * create publication dba."pu3" ()
2549 * create publication "dba"."pu4" ()
2550 */
2551
2552 readIdentifier (name);
2553 readToken (token);
2554 if (isType (token, TOKEN_PERIOD))
2555 {
2556 readIdentifier (name);
2557 readToken (token);
2558 }
2559 if (isType (token, TOKEN_OPEN_PAREN))
2560 {
2561 if (isType (name, TOKEN_IDENTIFIER) ||
2562 isType (name, TOKEN_STRING))
2563 {
2564 makeSqlTag (name, SQLTAG_PUBLICATION);
2565 }
2566 }
2567 findCmdTerm (token, false);
2568 deleteToken (name);
2569 }
parseService(tokenInfo * const token)2570 static void parseService (tokenInfo *const token)
2571 {
2572 tokenInfo *const name = newToken ();
2573
2574 /*
2575 * This deals with these formats
2576 * CREATE SERVICE s1 TYPE 'HTML'
2577 * AUTHORIZATION OFF USER DBA AS
2578 * SELECT *
2579 * FROM SYS.SYSTABLE;
2580 * CREATE SERVICE "s2" TYPE 'HTML'
2581 * AUTHORIZATION OFF USER DBA AS
2582 * CALL sp_Something();
2583 */
2584
2585 readIdentifier (name);
2586 readToken (token);
2587 if (isKeyword (token, KEYWORD_type))
2588 {
2589 if (isType (name, TOKEN_IDENTIFIER) ||
2590 isType (name, TOKEN_STRING))
2591 {
2592 makeSqlTag (name, SQLTAG_SERVICE);
2593 }
2594 }
2595 findCmdTerm (token, false);
2596 deleteToken (name);
2597 }
2598
parseDomain(tokenInfo * const token)2599 static void parseDomain (tokenInfo *const token)
2600 {
2601 tokenInfo *const name = newToken ();
2602
2603 /*
2604 * This deals with these formats
2605 * CREATE DOMAIN|DATATYPE [AS] your_name ...;
2606 */
2607
2608 readIdentifier (name);
2609 if (isKeyword (name, KEYWORD_is))
2610 {
2611 readIdentifier (name);
2612 }
2613 readToken (token);
2614 if (isType (name, TOKEN_IDENTIFIER) ||
2615 isType (name, TOKEN_STRING))
2616 {
2617 makeSqlTag (name, SQLTAG_DOMAIN);
2618 }
2619 findCmdTerm (token, false);
2620 deleteToken (name);
2621 }
2622
parseDrop(tokenInfo * const token)2623 static void parseDrop (tokenInfo *const token)
2624 {
2625 /*
2626 * This deals with these formats
2627 * DROP TABLE|PROCEDURE|DOMAIN|DATATYPE name;
2628 *
2629 * Just simply skip over these statements.
2630 * They are often confused with PROCEDURE prototypes
2631 * since the syntax is similar, this effectively deals with
2632 * the issue for all types.
2633 */
2634
2635 findCmdTerm (token, false);
2636 }
2637
parseVariable(tokenInfo * const token)2638 static void parseVariable (tokenInfo *const token)
2639 {
2640 tokenInfo *const name = newToken ();
2641
2642 /*
2643 * This deals with these formats
2644 * create variable varname1 integer;
2645 * create variable @varname2 integer;
2646 * create variable "varname3" integer;
2647 * drop variable @varname3;
2648 */
2649
2650 readIdentifier (name);
2651 readToken (token);
2652 if (! isType (token, TOKEN_SEMICOLON) &&
2653 (isType (name, TOKEN_IDENTIFIER) ||
2654 isType (name, TOKEN_STRING)))
2655 {
2656 makeSqlTag (name, SQLTAG_VARIABLE);
2657 }
2658 findCmdTerm (token, true);
2659
2660 deleteToken (name);
2661 }
2662
parseSynonym(tokenInfo * const token)2663 static void parseSynonym (tokenInfo *const token)
2664 {
2665 tokenInfo *const name = newToken ();
2666
2667 /*
2668 * This deals with these formats
2669 * create variable varname1 integer;
2670 * create variable @varname2 integer;
2671 * create variable "varname3" integer;
2672 * drop variable @varname3;
2673 */
2674
2675 readIdentifier (name);
2676 readToken (token);
2677 if (isKeyword (token, KEYWORD_for) &&
2678 (isType (name, TOKEN_IDENTIFIER) ||
2679 isType (name, TOKEN_STRING)))
2680 {
2681 makeSqlTag (name, SQLTAG_SYNONYM);
2682 }
2683 findCmdTerm (token, true);
2684
2685 deleteToken (name);
2686 }
2687
parseView(tokenInfo * const token)2688 static void parseView (tokenInfo *const token)
2689 {
2690 tokenInfo *const name = newToken ();
2691
2692 /*
2693 * This deals with these formats
2694 * create view VIEW;
2695 * create view VIEW as ...;
2696 * create view VIEW (...) as ...;
2697 */
2698
2699 readIdentifier (name);
2700 readToken (token);
2701 if (isType (token, TOKEN_PERIOD))
2702 {
2703 readIdentifier (name);
2704 readToken (token);
2705 }
2706 if (isType (token, TOKEN_OPEN_PAREN))
2707 {
2708 skipArgumentList(token);
2709 }
2710
2711 while (! isKeyword (token, KEYWORD_is) &&
2712 ! isType (token, TOKEN_SEMICOLON) &&
2713 ! isType (token, TOKEN_EOF))
2714 {
2715 readToken (token);
2716 }
2717
2718 if (isKeyword (token, KEYWORD_is) &&
2719 (isType (name, TOKEN_IDENTIFIER) ||
2720 isType (name, TOKEN_STRING)))
2721 {
2722 makeSqlTag (name, SQLTAG_VIEW);
2723 }
2724
2725 findCmdTerm (token, true);
2726
2727 deleteToken (name);
2728 }
2729
parseMLTable(tokenInfo * const token)2730 static void parseMLTable (tokenInfo *const token)
2731 {
2732 tokenInfo *const version = newToken ();
2733 tokenInfo *const table = newToken ();
2734 tokenInfo *const event = newToken ();
2735
2736 /*
2737 * This deals with these formats
2738 * call dbo.ml_add_table_script( 'version', 'table_name', 'event',
2739 * 'some SQL statement'
2740 * );
2741 */
2742
2743 readToken (token);
2744 if (isType (token, TOKEN_OPEN_PAREN))
2745 {
2746 readToken (version);
2747 readToken (token);
2748 while (! isType (token, TOKEN_COMMA) &&
2749 ! isType (token, TOKEN_CLOSE_PAREN) &&
2750 ! isType (token, TOKEN_EOF))
2751 {
2752 readToken (token);
2753 }
2754
2755 if (isType (token, TOKEN_COMMA))
2756 {
2757 readToken (table);
2758 readToken (token);
2759 while (! isType (token, TOKEN_COMMA) &&
2760 ! isType (token, TOKEN_CLOSE_PAREN) &&
2761 ! isType (token, TOKEN_EOF))
2762 {
2763 readToken (token);
2764 }
2765
2766 if (isType (token, TOKEN_COMMA))
2767 {
2768 readToken (event);
2769
2770 if (isType (version, TOKEN_STRING) &&
2771 isType (table, TOKEN_STRING) &&
2772 isType (event, TOKEN_STRING))
2773 {
2774 addToScope(version, table->string, SQLTAG_TABLE);
2775 addToScope(version, event->string, SQLTAG_EVENT);
2776 makeSqlTag (version, SQLTAG_MLTABLE);
2777 }
2778 }
2779 if (! isType (token, TOKEN_CLOSE_PAREN))
2780 findToken (token, TOKEN_CLOSE_PAREN);
2781 }
2782 }
2783
2784 findCmdTerm (token, true);
2785
2786 deleteToken (version);
2787 deleteToken (table);
2788 deleteToken (event);
2789 }
2790
parseMLConn(tokenInfo * const token)2791 static void parseMLConn (tokenInfo *const token)
2792 {
2793 tokenInfo *const version = newToken ();
2794 tokenInfo *const event = newToken ();
2795
2796 /*
2797 * This deals with these formats
2798 * call ml_add_connection_script( 'version', 'event',
2799 * 'some SQL statement'
2800 * );
2801 */
2802
2803 readToken (token);
2804 if (isType (token, TOKEN_OPEN_PAREN))
2805 {
2806 readToken (version);
2807 readToken (token);
2808 while (! isType (token, TOKEN_COMMA) &&
2809 ! isType (token, TOKEN_CLOSE_PAREN) &&
2810 ! isType (token, TOKEN_EOF))
2811 {
2812 readToken (token);
2813 }
2814
2815 if (isType (token, TOKEN_COMMA))
2816 {
2817 readToken (event);
2818
2819 if (isType (version, TOKEN_STRING) &&
2820 isType (event, TOKEN_STRING))
2821 {
2822 addToScope(version, event->string, SQLTAG_EVENT);
2823 makeSqlTag (version, SQLTAG_MLCONN);
2824 }
2825 }
2826 if (! isType (token, TOKEN_CLOSE_PAREN))
2827 findToken (token, TOKEN_CLOSE_PAREN);
2828
2829 }
2830
2831 findCmdTerm (token, true);
2832
2833 deleteToken (version);
2834 deleteToken (event);
2835 }
2836
parseMLProp(tokenInfo * const token)2837 static void parseMLProp (tokenInfo *const token)
2838 {
2839 tokenInfo *const component = newToken ();
2840 tokenInfo *const prop_set_name = newToken ();
2841 tokenInfo *const prop_name = newToken ();
2842
2843 /*
2844 * This deals with these formats
2845 * ml_add_property (
2846 * 'comp_name',
2847 * 'prop_set_name',
2848 * 'prop_name',
2849 * 'prop_value'
2850 * )
2851 */
2852
2853 readToken (token);
2854 if (isType (token, TOKEN_OPEN_PAREN))
2855 {
2856 readToken (component);
2857 readToken (token);
2858 while (! isType (token, TOKEN_COMMA) &&
2859 ! isType (token, TOKEN_CLOSE_PAREN) &&
2860 ! isType (token, TOKEN_EOF))
2861 {
2862 readToken (token);
2863 }
2864
2865 if (isType (token, TOKEN_COMMA))
2866 {
2867 readToken (prop_set_name);
2868 readToken (token);
2869 while (! isType (token, TOKEN_COMMA) &&
2870 ! isType (token, TOKEN_CLOSE_PAREN) &&
2871 ! isType (token, TOKEN_EOF))
2872 {
2873 readToken (token);
2874 }
2875
2876 if (isType (token, TOKEN_COMMA))
2877 {
2878 readToken (prop_name);
2879
2880 if (isType (component, TOKEN_STRING) &&
2881 isType (prop_set_name, TOKEN_STRING) &&
2882 isType (prop_name, TOKEN_STRING))
2883 {
2884 addToScope(component, prop_set_name->string, SQLTAG_MLPROP /* FIXME */);
2885 addToScope(component, prop_name->string, SQLTAG_MLPROP /* FIXME */);
2886 makeSqlTag (component, SQLTAG_MLPROP);
2887 }
2888 }
2889 if (! isType (token, TOKEN_CLOSE_PAREN))
2890 findToken (token, TOKEN_CLOSE_PAREN);
2891 }
2892 }
2893
2894 findCmdTerm (token, true);
2895
2896 deleteToken (component);
2897 deleteToken (prop_set_name);
2898 deleteToken (prop_name);
2899 }
2900
parseComment(tokenInfo * const token)2901 static void parseComment (tokenInfo *const token)
2902 {
2903 /*
2904 * This deals with this statement:
2905 * COMMENT TO PRESERVE FORMAT ON PROCEDURE "DBA"."test" IS
2906 * {create PROCEDURE DBA."test"()
2907 * BEGIN
2908 * signal dave;
2909 * END
2910 * }
2911 * ;
2912 * The comment can contain anything between the CURLY
2913 * braces
2914 * COMMENT ON USER "admin" IS
2915 * 'Administration Group'
2916 * ;
2917 * Or it could be a simple string with no curly braces
2918 */
2919 while (! isKeyword (token, KEYWORD_is) &&
2920 ! isType (token, TOKEN_EOF))
2921 {
2922 readToken (token);
2923 }
2924 readToken (token);
2925 if (isType(token, TOKEN_OPEN_CURLY))
2926 {
2927 findToken (token, TOKEN_CLOSE_CURLY);
2928 }
2929
2930 findCmdTerm (token, true);
2931 }
2932
parseCCFLAGS(tokenInfo * const token)2933 static void parseCCFLAGS (tokenInfo *const token)
2934 {
2935 readToken(token);
2936 if (!isType (token, TOKEN_EQUAL))
2937 {
2938 findCmdTerm (token, true);
2939 return;
2940 }
2941
2942 readToken(token);
2943 if (!isType (token, TOKEN_STRING))
2944 {
2945 findCmdTerm (token, true);
2946 return;
2947 }
2948
2949 bool in_var = true;
2950 const char *s = vStringValue(token->string);
2951 vString *ccflag = vStringNew();
2952 /* http://web.deu.edu.tr/doc/oracle/B19306_01/server.102/b14237/initparams158.htm#REFRN10261 */
2953 while (*s)
2954 {
2955 if (in_var && isIdentChar1((int)*s))
2956 vStringPut(ccflag, *s);
2957 else if (*s == ':' && !vStringIsEmpty(ccflag))
2958 {
2959 if (lookupCaseKeyword(vStringValue(ccflag), Lang_sql)
2960 != KEYWORD_inquiry_directive)
2961 {
2962 int index = makeSimpleTag(ccflag, SQLTAG_PLSQL_CCFLAGS);
2963 registerEntry(index);
2964 vStringClear(ccflag);
2965 in_var = false;
2966 }
2967 }
2968 else if (*s == ',')
2969 in_var = true;
2970 s++;
2971 }
2972 vStringDelete(ccflag);
2973
2974 }
2975
parseDatabase(tokenInfo * const token,enum eKeywordId keyword)2976 static void parseDatabase (tokenInfo *const token, enum eKeywordId keyword)
2977 {
2978 tokenInfo * name;
2979
2980 /*
2981 * In MySQL and HPL/SQL, "CREATE DATABASE" and "CREATE SCHEMA"
2982 * are the same. However, In PostgreSQL, they are different.
2983 * Too support PostgreSQL, we prepare different kinds for them.
2984 *
2985 * MySQL
2986 * A. CREATE {DATABASE | SCHEMA} [IF NOT EXISTS] db_name ...;
2987 *
2988 * PostgreSQL
2989 *
2990 * B. CREATE DATABASE name ...;
2991 *
2992 * C. CREATE SCHEMA schema_name [ AUTHORIZATION role_specification ] [ schema_element [ ... ] ]
2993 * D. CREATE SCHEMA AUTHORIZATION role_specification [ schema_element [ ... ] ]
2994 * E. CREATE SCHEMA IF NOT EXISTS schema_name [ AUTHORIZATION role_specification ]
2995 * F. CREATE SCHEMA IF NOT EXISTS AUTHORIZATION role_specification
2996 *
2997 * HPL/SQL
2998 * G. CREATE DATABASE | SCHEMA [IF NOT EXISTS] dbname_expr...;
2999 */
3000 readIdentifier (token);
3001 if (keyword == KEYWORD_schema
3002 && isType (token, TOKEN_IDENTIFIER)
3003 && vStringLength (token->string) == 13
3004 && strcasecmp("authorization", vStringValue(token->string)) == 0)
3005 {
3006 /* D. */
3007 readIdentifier (token);
3008 makeSqlTag (token, SQLTAG_SCHEMA);
3009 findCmdTerm (token, false);
3010 return;
3011 }
3012
3013 name = newToken ();
3014 copyToken (name, token);
3015 readIdentifier (token);
3016 parseIdAfterIfNotExists (name, token, true);
3017
3018 makeSqlTag (name,
3019 keyword == KEYWORD_database
3020 ? SQLTAG_DATABASE: SQLTAG_SCHEMA);
3021 deleteToken (name);
3022
3023 /* TODO:
3024 *
3025 * In PostgreSQL, CREATE FOO can follow to CREATE SCHEMA like:
3026 *
3027 * -- https://www.postgresql.org/docs/current/sql-createschema.html
3028 *
3029 * CREATE SCHEMA hollywood
3030 * CREATE TABLE films (title text, release date, awards text[])
3031 * CREATE VIEW winners AS
3032 * SELECT title, release FROM films WHERE awards IS NOT NULL;
3033 *
3034 * In above example, "hollywood.films" and "hollywood.winners" should be
3035 * tagged.
3036 */
3037 findCmdTerm (token, true);
3038 }
3039
parseKeywords(tokenInfo * const token)3040 static void parseKeywords (tokenInfo *const token)
3041 {
3042 switch (token->keyword)
3043 {
3044 case KEYWORD_begin: parseBlock (token, false); break;
3045 case KEYWORD_inquiry_directive:
3046 if (strcasecmp(vStringValue(token->string), "PLSQL_CCFLAGS") == 0)
3047 parseCCFLAGS (token);
3048 break;
3049 case KEYWORD_comment: parseComment (token); break;
3050 case KEYWORD_cursor: parseSimple (token, SQLTAG_CURSOR); break;
3051 case KEYWORD_database: parseDatabase (token, KEYWORD_database); break;
3052 case KEYWORD_datatype: parseDomain (token); break;
3053 case KEYWORD_declare: parseBlock (token, false); break;
3054 case KEYWORD_domain: parseDomain (token); break;
3055 case KEYWORD_drop: parseDrop (token); break;
3056 case KEYWORD_event: parseEvent (token); break;
3057 case KEYWORD_extension: findCmdTerm (token, false); break;
3058 case KEYWORD_function: parseSubProgram (token); break;
3059 case KEYWORD_if: parseStatements (token, false); break;
3060 case KEYWORD_index: parseIndex (token); break;
3061 case KEYWORD_ml_table: parseMLTable (token); break;
3062 case KEYWORD_ml_table_lang: parseMLTable (token); break;
3063 case KEYWORD_ml_table_dnet: parseMLTable (token); break;
3064 case KEYWORD_ml_table_java: parseMLTable (token); break;
3065 case KEYWORD_ml_table_chk: parseMLTable (token); break;
3066 case KEYWORD_ml_conn: parseMLConn (token); break;
3067 case KEYWORD_ml_conn_lang: parseMLConn (token); break;
3068 case KEYWORD_ml_conn_dnet: parseMLConn (token); break;
3069 case KEYWORD_ml_conn_java: parseMLConn (token); break;
3070 case KEYWORD_ml_conn_chk: parseMLConn (token); break;
3071 case KEYWORD_ml_prop: parseMLProp (token); break;
3072 case KEYWORD_package: parsePackage (token); break;
3073 case KEYWORD_procedure: parseSubProgram (token); break;
3074 case KEYWORD_publication: parsePublication (token); break;
3075 case KEYWORD_schema: parseDatabase (token, KEYWORD_schema); break;
3076 case KEYWORD_service: parseService (token); break;
3077 case KEYWORD_subtype: parseSimple (token, SQLTAG_SUBTYPE); break;
3078 case KEYWORD_synonym: parseSynonym (token); break;
3079 case KEYWORD_table: parseTable (token); break;
3080 case KEYWORD_trigger: parseTrigger (token); break;
3081 case KEYWORD_type: parseType (token); break;
3082 case KEYWORD_variable: parseVariable (token); break;
3083 case KEYWORD_view: parseView (token); break;
3084 case KEYWORD_with: readToken (token); break; /* skip next token */
3085 case KEYWORD_without: readToken (token); break; /* skip next token */
3086 default: break;
3087 }
3088 }
3089
parseSqlFile(tokenInfo * const token)3090 static tokenType parseSqlFile (tokenInfo *const token)
3091 {
3092 do
3093 {
3094 readToken (token);
3095
3096 if (isType (token, TOKEN_BLOCK_LABEL_BEGIN))
3097 parseLabel (token);
3098 else
3099 parseKeywords (token);
3100 } while (! isKeyword (token, KEYWORD_end) &&
3101 ! isType (token, TOKEN_EOF));
3102
3103 return token->type;
3104 }
3105
initialize(const langType language)3106 static void initialize (const langType language)
3107 {
3108 Assert (ARRAY_SIZE (SqlKinds) == SQLTAG_COUNT);
3109 Lang_sql = language;
3110 addKeywordGroup (&predefinedInquiryDirective, language);
3111 }
3112
findSqlTags(void)3113 static void findSqlTags (void)
3114 {
3115 tokenInfo *const token = newToken ();
3116
3117 while (parseSqlFile (token) != TOKEN_EOF);
3118
3119 deleteToken (token);
3120 }
3121
SqlParser(void)3122 extern parserDefinition* SqlParser (void)
3123 {
3124 static const char *const extensions [] = { "sql", NULL };
3125 static const char *const aliases [] = {"pgsql", NULL };
3126 parserDefinition* def = parserNew ("SQL");
3127 def->kindTable = SqlKinds;
3128 def->kindCount = ARRAY_SIZE (SqlKinds);
3129 def->extensions = extensions;
3130 def->aliases = aliases;
3131 def->parser = findSqlTags;
3132 def->initialize = initialize;
3133 def->keywordTable = SqlKeywordTable;
3134 def->keywordCount = ARRAY_SIZE (SqlKeywordTable);
3135 def->useCork = CORK_QUEUE | CORK_SYMTAB;
3136 return def;
3137 }
3138