xref: /Universal-ctags/peg/elm.peg (revision fa5642e4dd4866485c417ba4997bf6e2646e8441)
194e964efSNik Silver# Copyright (c) 2022 Nik Silver
294e964efSNik Silver#
394e964efSNik Silver# This source code is released for free distribution under the terms of the
494e964efSNik Silver# GNU General Public License version 2 or later.
594e964efSNik Silver#
694e964efSNik Silver# Thanks to:
794e964efSNik Silver# - Mark Skipper, for the original Elm optlib parser, which inspired this;
894e964efSNik Silver# - Samuel Stauffer, for the Thrift PEG parser, which showed me how to
994e964efSNik Silver#   write a PEG parser;
1094e964efSNik Silver# - Jan Dolinár, for the Kotlin PEG parser, which also provided insight;
1194e964efSNik Silver# - Masatake YAMATO, for patience and guidance in code reviews.
1294e964efSNik Silver#
1394e964efSNik Silver# This parser generates tags for Elm. See https://elm-lang.org/docs/syntax
1494e964efSNik Silver# for language reference.
1594e964efSNik Silver#
1694e964efSNik Silver# The parser will tag items reliably at the top level. Functions
1794e964efSNik Silver# defined in let/in blocks are also tagged, but with limitations. See below.
1894e964efSNik Silver#
1994e964efSNik Silver# Kinds
2094e964efSNik Silver# - m module
2194e964efSNik Silver# - n namespace (ie a module that's renamed)
2294e964efSNik Silver# - t type
2394e964efSNik Silver# - c constructor (within a type)
2494e964efSNik Silver# - a alias
2594e964efSNik Silver# - p port
2694e964efSNik Silver# - f function
2794e964efSNik Silver#
2894e964efSNik Silver# Key/value pairs
2994e964efSNik Silver# - roles:def       This is defined here.
3094e964efSNik Silver# - roles:imported  This is imported here.
3194e964efSNik Silver# - type:<t>        This constructor is in the scope of type <t>, which
3294e964efSNik Silver#                   may be dotted. Eg Main.myType.
3394e964efSNik Silver# - function:<f>    This function is in the scope of function <f>, which
3494e964efSNik Silver#                   may be dotted. Eg Main.myFunc.
3594e964efSNik Silver# - module:<m>      This is in the scope of module <m>.
3694e964efSNik Silver# - typeref:description:<t>   This function, constructor or port
3794e964efSNik Silver#                   has type <t>.
3894e964efSNik Silver# - moduleName:<m>  This namespace has original module name <m>.
3994e964efSNik Silver#
4094e964efSNik Silver# Functions defined in let/in blocks may be tagged, with these limitations:
4194e964efSNik Silver# - the LHS (up to and including the '=') need to be on a single line;
4294e964efSNik Silver# - the LHS can only have simple parameters;
4394e964efSNik Silver# - their scope is only marked as being in the top-most function;
4494e964efSNik Silver# - any type annotation is ignored.
4594e964efSNik Silver# This should be good for 90% of inner functions. To make it totally robust
4694e964efSNik Silver# is much more complicated due to (a) Elm's clever indentation-sensitivity
4794e964efSNik Silver# and (b) limitations of the PEG parser used here.
4894e964efSNik Silver#
4994e964efSNik Silver# To do:
5094e964efSNik Silver# Maybe do:
5194e964efSNik Silver# - let/in blocks
5294e964efSNik Silver#   - Allow tuples on the LHS. Eg '(val1, val2) = valFunc'.
5394e964efSNik Silver#   - Inner functions' type annotations are used in the function's
5494e964efSNik Silver#     type description.
5594e964efSNik Silver#   - Inner functions can have more complex parameters.
5694e964efSNik Silver# - Functions
5794e964efSNik Silver#   - Allow non-Latin upper and lower case. Use
5894e964efSNik Silver#     https://util.unicode.org/UnicodeJsps/properties.html
5994e964efSNik Silver#     combined with \p{Lu}, \p{Ll} and \p{L}.
6094e964efSNik Silver#
6194e964efSNik Silver# Won't do:
6294e964efSNik Silver# - Handle Elm's indentation properly.
6394e964efSNik Silver
6494e964efSNik Silver
6594e964efSNik Silver%prefix "pelm"
6694e964efSNik Silver
6794e964efSNik Silver%auxil	"struct parserCtx *"
6894e964efSNik Silver
6994e964efSNik Silver%earlysource {
7094e964efSNik Silver    #include "general.h"
7194e964efSNik Silver}
7294e964efSNik Silver
7394e964efSNik Silver%header {
7494e964efSNik Silver	struct parserCtx;
7594e964efSNik Silver}
7694e964efSNik Silver
7794e964efSNik Silver%source {
7894e964efSNik Silver#include "elm_pre.h"
7994e964efSNik Silver#include "routines.h"
8094e964efSNik Silver
8194e964efSNik Silver/*
8294e964efSNik Silver * Include these lines to debug the parsing.
8394e964efSNik Silver * From https://github.com/arithy/packcc#macros
8494e964efSNik Silver * This will output parsing info to STDERR.tmp in the vent of a failed test.
8594e964efSNik Silver */
8694e964efSNik Silver
8794e964efSNik Silver/*
8894e964efSNik Silverstatic const char *dbg_str[] = { "Evaluating rule", "Matched rule", "Abandoning rule" };
8994e964efSNik Silver
9094e964efSNik Silver#define PCC_DEBUG(auxil, event, rule, level, pos, buffer, length) \
9194e964efSNik Silver    fprintf(stderr, "%*s%s %s @%zu [%.*s]\n", \
9294e964efSNik Silver        (int)((level) * 2), "", dbg_str[event], rule, pos, (int)(length), buffer)
9394e964efSNik Silver */
9494e964efSNik Silver}
9594e964efSNik Silver
9694e964efSNik Silver# Top level elements -----------------------------------------------------
9794e964efSNik Silver
9894e964efSNik Silver# We separate the file into the module section and the main section
9994e964efSNik Silver# so that we only consider and tag one module declaration
10094e964efSNik Silver
10194e964efSNik Silverfile <-
10294e964efSNik Silver    {
10394e964efSNik Silver        ELM_INIT_MODULE_SCOPE;
10494e964efSNik Silver    }
10594e964efSNik Silver    TLSS?
10694e964efSNik Silver    moduleDeclaration?
10794e964efSNik Silver    TLSS?
10894e964efSNik Silver    mainTopLevelStatements?
10994e964efSNik Silver    TLSS?
11094e964efSNik Silver    EOF
11194e964efSNik Silver
11294e964efSNik SilvermainTopLevelStatements <-
11394e964efSNik Silver    topLevelStatement (TLSS topLevelStatement)*
11494e964efSNik Silver
11594e964efSNik SilvertopLevelStatement <-
11694e964efSNik Silver    importStatement
11794e964efSNik Silver    / typeAlias
11894e964efSNik Silver    / customType
11994e964efSNik Silver    / portDeclaration
12094e964efSNik Silver    / functionWithTypeAnnotation
12194e964efSNik Silver    / functionDefinition
12294e964efSNik Silver    / ignoreRestOfStatement
12394e964efSNik Silver
12494e964efSNik Silver# Main Elm grammar -------------------------------------------------------
12594e964efSNik Silver
12694e964efSNik Silver# Module declaration
12794e964efSNik Silver#
12894e964efSNik Silver# We can be a bit relaxed about distinguishing functions, types and
12994e964efSNik Silver# constructors listed in a module declaration, because we're not going
13094e964efSNik Silver# to tag them.
13194e964efSNik Silver
13294e964efSNik SilvermoduleDeclaration <-
13394e964efSNik Silver    ('port' _1_)? 'module' _1_ <dottedIdentifier> _1_ 'exposing' _0_ '(' exposedList ')' EOS {
13494e964efSNik Silver        elm_module_scope_index = makeElmTagSettingScope(auxil, $1, $1s, K_MODULE, ROLE_DEFINITION_INDEX);
13594e964efSNik Silver    }
13694e964efSNik Silver
13794e964efSNik SilverexposedList <- _0_ exposedItem _0_ (',' _0_ exposedList )*
13894e964efSNik Silver
13994e964efSNik SilverexposedItem <-
14094e964efSNik Silver    exposedFieldOrType
14194e964efSNik Silver    / exposedFunction
14294e964efSNik Silver    / exposedItemIgnored
14394e964efSNik Silver
14494e964efSNik SilverexposedFieldOrType <-
14594e964efSNik Silver    <upperStartIdentifier> (_0_ '(' _0_ exposedTypeConstructorList _0_ ')')?
14694e964efSNik Silver
14794e964efSNik SilverexposedFunction <-
14894e964efSNik Silver    lowerStartIdentifier
14994e964efSNik Silver
15094e964efSNik SilverexposedItemIgnored <- '.'+
15194e964efSNik Silver
15294e964efSNik SilverexposedTypeConstructorList <-
15394e964efSNik Silver    (upperStartIdentifier / exposedItemIgnored) _0_ (',' _0_ exposedTypeConstructorList)*
15494e964efSNik Silver
15594e964efSNik Silver# Type alias
15694e964efSNik Silver#
15794e964efSNik Silver# We don't care what the actual alias is
15894e964efSNik Silver
15994e964efSNik SilvertypeAlias <-
16094e964efSNik Silver    'type' _1_ 'alias' _1_ <upperStartIdentifier> _0_ '=' _0_ ignoreRestOfStatement {
16194e964efSNik Silver        makeElmTag(auxil, $1, $1s, K_ALIAS, ROLE_DEFINITION_INDEX);
16294e964efSNik Silver    }
16394e964efSNik Silver
16494e964efSNik Silver# Custom type
16594e964efSNik Silver#
16694e964efSNik Silver# Includes type parameters, such as 'x' in 'type MyType x = Wrap x'.
16794e964efSNik Silver#
16894e964efSNik Silver# In a definition such as 'type MyType = Cons1 String Int' we
16994e964efSNik Silver# capture 'MyType', and then for each type in each constructor
17094e964efSNik Silver# subtype (here, 'String' and 'Int') we append a '->' and finally
17194e964efSNik Silver# concatentate them all to get the constructor's type description,
17294e964efSNik Silver# such as 'String -> Int -> MyType'
17394e964efSNik Silver
17494e964efSNik SilvercustomType <-
17594e964efSNik Silver    'type' _1_ <upperStartIdentifier> (_0_ typeParameterList)? _0_ '=' _0_ {
17694e964efSNik Silver        initElmConstructorFields(auxil, $1);
17794e964efSNik Silver        makeElmTagSettingScope(auxil, $1, $1s, K_TYPE, ROLE_DEFINITION_INDEX);
17894e964efSNik Silver    } constructorList EOS {
17994e964efSNik Silver        POP_SCOPE(auxil);
18094e964efSNik Silver        tidyElmConstructorFields(auxil);
18194e964efSNik Silver    }
18294e964efSNik Silver
18394e964efSNik SilvertypeParameterList <- lowerStartIdentifier (_1_ lowerStartIdentifier)*
18494e964efSNik Silver
18594e964efSNik Silver# A type could be defined as a constructor list:
18694e964efSNik Silver#     type A = Cons1 String | Cons2 Float Float | ...
18794e964efSNik Silver# The 'String' and the 'Float Float' etc are the constructor subtypes.
18894e964efSNik Silver# Each 'String', 'Float', etc is a single type spec.
18994e964efSNik Silver# But a single type spec could also be a record, a tuple or a function spec.
19094e964efSNik Silver#
19194e964efSNik Silver# Subtypes in constructors need to be parsed differently from types in
19294e964efSNik Silver# type annotations and record fields. Consider these:
19394e964efSNik Silver#     type A1Type a b = A1Cons a b              -- Line 1
19494e964efSNik Silver#     type A2Type a b = A2Cons String a b       -- Line 2
19594e964efSNik Silver#     type BType a b = BCons { x : A2Type a b}  -- Line 3
19694e964efSNik Silver#     cFunc : A1Type String Int -> String       -- Line 4
19794e964efSNik Silver# In line 1, 'a b' must be parsed as two individual types (parameterised).
19894e964efSNik Silver# In line 2, 'String a b' must be parsed as three individual types.
19994e964efSNik Silver# In line 3, 'A2Type a b' must be parsed as one type, even though it's
20094e964efSNik Silver# lexically equivalent to 'String a b' on line 2.
20194e964efSNik Silver# In line 4, 'A1Type String Int' must also be parsed one type.
20294e964efSNik Silver# This means we have to have slightly different rules for parsing a
20394e964efSNik Silver# constructor's subtypes as from other cases. The first case is handled
20494e964efSNik Silver# by constructorSubtypeList and singleConstructorSubtypeSpec. The second
20594e964efSNik Silver# case is handled by singleTypeSpec.
20694e964efSNik Silver
20794e964efSNik SilverconstructorList <- <upperStartIdentifier> {
20894e964efSNik Silver        initElmConstructorSubtypeFields(auxil);
20994e964efSNik Silver    } _0_ <constructorSubtypeList>? {
21094e964efSNik Silver        int r = makeElmTag(auxil, $1, $1s, K_CONSTRUCTOR, ROLE_DEFINITION_INDEX);
21194e964efSNik Silver        addElmConstructorTypeRef(auxil, r);
21294e964efSNik Silver    } _0_ ('|' _0_ constructorList)?
21394e964efSNik Silver
21494e964efSNik SilverconstructorSubtypeList <- singleConstructorSubtypeSpec (_0_ singleConstructorSubtypeSpec)*
21594e964efSNik Silver
21694e964efSNik SilversingleConstructorSubtypeSpec <-
21794e964efSNik Silver    < recordTypeSpec
21894e964efSNik Silver      / tupleTypeSpec
21994e964efSNik Silver      / functionTypeSpec
22094e964efSNik Silver      / dottedIdentifier
22194e964efSNik Silver    >
22294e964efSNik Silver    {
22394e964efSNik Silver        addElmConstructorSubtype(auxil, $1);
22494e964efSNik Silver    }
22594e964efSNik Silver
22694e964efSNik SilversingleTypeSpec <-
22794e964efSNik Silver    recordTypeSpec
22894e964efSNik Silver    / tupleTypeSpec
22994e964efSNik Silver    / functionTypeSpec
23094e964efSNik Silver    / parameterisedTypeSpec
23194e964efSNik Silver
23294e964efSNik SilverrecordTypeSpec <-
23394e964efSNik Silver    '{' (_0_ recordRestrictionPrefix)? _0_ fieldSpec (_0_ ',' _0_ fieldSpec)* _0_ '}'
23494e964efSNik Silver    / '{' (_0_ recordRestrictionPrefix)? _0_ '}'
23594e964efSNik Silver
23694e964efSNik SilverrecordRestrictionPrefix <-
23794e964efSNik Silver    lowerStartIdentifier _0_ '|'
23894e964efSNik Silver
23994e964efSNik SilverfieldSpec <-
24094e964efSNik Silver    lowerStartIdentifier _0_ ':' _0_ singleTypeSpec
24194e964efSNik Silver
24294e964efSNik SilvertupleTypeSpec <-
24394e964efSNik Silver    '(' _0_ singleTypeSpec (_0_ ',' _0_ singleTypeSpec)* _0_ ')'
24494e964efSNik Silver    / '(' _0_ ')'
24594e964efSNik Silver
24694e964efSNik SilverparameterisedTypeSpec <-
24794e964efSNik Silver    dottedIdentifier (_1_ (singleTypeSpec / lowerStartIdentifier))*
24894e964efSNik Silver
24994e964efSNik SilverfunctionTypeSpec <-
25094e964efSNik Silver    singleTypeSpec (_0_ '->' _0_ singleTypeSpec)+
25194e964efSNik Silver
25294e964efSNik Silver# Port declaration
25394e964efSNik Silver
25494e964efSNik SilverportDeclaration <-
25594e964efSNik Silver    'port' _1_ <lowerStartIdentifier> _0_ ':' _0_ <typeAnnotation> EOS {
25694e964efSNik Silver        int r = makeElmTag(auxil, $1, $1s, K_PORT, ROLE_DEFINITION_INDEX);
25794e964efSNik Silver        addElmTypeRef(r, $2);
25894e964efSNik Silver    }
25994e964efSNik Silver
26094e964efSNik Silver# Import statement
26194e964efSNik Silver#
26294e964efSNik Silver# For the import statement we don't want the imported items to appear in the
26394e964efSNik Silver# scope of the current module (ie this file), otherwise they'll be named
26494e964efSNik Silver# wrongly. So we # want to save the module scope, make the imported tags,
26594e964efSNik Silver# then restore the module scope. We do this in two separate C code blocks,
26694e964efSNik Silver# because the module scope needs to be saved before any of the imported tags
26794e964efSNik Silver# are made.
26894e964efSNik Silver#
26994e964efSNik Silver# Also, if we create a namespace then that *does* live in the scope of the
27094e964efSNik Silver# current module, so we'll make that tag (if needed) before saving the
27194e964efSNik Silver# module scope.
27294e964efSNik Silver
27394e964efSNik SilverimportStatement <-
27494e964efSNik Silver    'import' _1_ <dottedIdentifier> (_1_ 'as' _1_ <upperStartIdentifier>)? {
27594e964efSNik Silver        // Make the namespace tag first, as it's in the file module's scope
27694e964efSNik Silver        if ($2s > 0) {
27794e964efSNik Silver            int r = makeElmTag(auxil, $2, $2s, K_NAMESPACE, ROLE_DEFINITION_INDEX);
27894e964efSNik Silver            attachParserFieldToCorkEntry (r, ElmFields[F_MODULENAME].ftype, $1);
27994e964efSNik Silver        }
28094e964efSNik Silver
28194e964efSNik Silver        // Now make the tag for the imported module, as it lives outside
28294e964efSNik Silver        // the scope of the file module
28394e964efSNik Silver        ELM_SAVE_MODULE_SCOPE;
284*fa5642e4SMasatake YAMATO        makeElmTagSettingScope(auxil, $1, $1s, K_MODULE, ELM_MODULE_IMPORTED);
28594e964efSNik Silver    } (_1_ 'exposing' _0_ '(' _0_ importedList _0_ ')')? EOS {
28694e964efSNik Silver        ELM_RESTORE_MODULE_SCOPE;
28794e964efSNik Silver    }
28894e964efSNik Silver
28994e964efSNik SilverimportedList <- importedItem _0_ (',' _0_ importedList)*
29094e964efSNik Silver
29194e964efSNik SilverimportedItem <-
29294e964efSNik Silver    importedFunction
29394e964efSNik Silver    / importedType
29494e964efSNik Silver    / importedItemIgnored
29594e964efSNik Silver
29694e964efSNik SilverimportedFunction <- <lowerStartIdentifier> {
297*fa5642e4SMasatake YAMATO        makeElmTag(auxil, $1, $1s, K_FUNCTION, ELM_FUNCTION_EXPOSED);
29894e964efSNik Silver    }
29994e964efSNik Silver
30094e964efSNik Silver# When importing a type and constructors we want the constructors
30194e964efSNik Silver# to be in the scope of the type. So we have to set the scope as the
30294e964efSNik Silver# type first, before parsing (and making the tags for) the constructors.
30394e964efSNik Silver# That's why the code here uses two separate C code blocks.
30494e964efSNik Silver
30594e964efSNik SilverimportedType <-
30694e964efSNik Silver    <upperStartIdentifier> {
307*fa5642e4SMasatake YAMATO        makeElmTagSettingScope(auxil, $1, $1s, K_TYPE, ELM_TYPE_EXPOSED);
30894e964efSNik Silver    } (_0_ '(' _0_ importedTypeConstructorList _0_ ')')? {
30994e964efSNik Silver        // We're done with the type and its constructors, so we can pop it
31094e964efSNik Silver        POP_SCOPE(auxil);
31194e964efSNik Silver    }
31294e964efSNik Silver
31394e964efSNik SilverimportedItemIgnored <- '.'+
31494e964efSNik Silver
31594e964efSNik SilverimportedTypeConstructorList <-
31694e964efSNik Silver    (importedTypeConstructor / importedItemIgnored) _0_ (',' _0_ importedTypeConstructorList)*
31794e964efSNik Silver
31894e964efSNik SilverimportedTypeConstructor <-
31994e964efSNik Silver    <upperStartIdentifier> {
320*fa5642e4SMasatake YAMATO        makeElmTag(auxil, $1, $1s, K_CONSTRUCTOR, ELM_CONSTRUCTOR_EXPOSED);
32194e964efSNik Silver    }
32294e964efSNik Silver
32394e964efSNik Silver# Function with a type annotation.
32494e964efSNik Silver#
32594e964efSNik Silver# The type is on one line, and the function must follow immediately as
32694e964efSNik Silver# the next top level statement
32794e964efSNik Silver
32894e964efSNik SilverfunctionWithTypeAnnotation <-
32994e964efSNik Silver    <lowerStartIdentifier> _0_ ':' _0_ <typeAnnotation> TLSS
33094e964efSNik Silver    <$1> _1_ <functionParameterList>? {
33194e964efSNik Silver        int r = makeElmTagSettingScope(auxil, $3, $3s, K_FUNCTION, ROLE_DEFINITION_INDEX);
33294e964efSNik Silver        addElmTypeRef(r, $2);
33394e964efSNik Silver        addElmSignature(r, $4);
33494e964efSNik Silver    } _0_ '=' _0_ expression EOS {
33594e964efSNik Silver        POP_SCOPE(auxil);
33694e964efSNik Silver    }
33794e964efSNik Silver
33894e964efSNik SilvertypeAnnotation <-
33994e964efSNik Silver    singleTypeSpec (_0_ '->' _0_ singleTypeSpec)*
34094e964efSNik Silver
34194e964efSNik Silver# Function without a type annotation
34294e964efSNik Silver
34394e964efSNik SilverfunctionDefinition <-
34494e964efSNik Silver    <nonKeywordIdentifier> _0_ <functionParameterList>? {
34594e964efSNik Silver        int r = makeElmTagSettingScope(auxil, $1, $1s, K_FUNCTION, ROLE_DEFINITION_INDEX);
34694e964efSNik Silver        addElmSignature(r, $2);
34794e964efSNik Silver    } _0_ '=' _0_ expression EOS {
34894e964efSNik Silver        POP_SCOPE(auxil);
34994e964efSNik Silver    }
35094e964efSNik Silver
35194e964efSNik Silver# A function parameter list is what we define a function with. It's the
35294e964efSNik Silver# x y z in 'fn x y z'. But of course they can be more complex, such as
35394e964efSNik Silver# 'fn (Cons a b) ({ thing } as otherThing))' etc.
35494e964efSNik Silver
35594e964efSNik SilverfunctionParameterList <- functionParameter (_0_ functionParameter)*
35694e964efSNik Silver
35794e964efSNik SilverfunctionParameter <-
35894e964efSNik Silver    plainFunctionParameter
35994e964efSNik Silver    / tupleFunctionParameter
36094e964efSNik Silver    / recordFunctionParameter
36194e964efSNik Silver    / constructorFunctionParameter
36294e964efSNik Silver
36394e964efSNik SilverplainFunctionParameter <-
36494e964efSNik Silver    lowerStartIdentifier (_0_ asClause)?
36594e964efSNik Silver
36694e964efSNik SilvertupleFunctionParameter <-
36794e964efSNik Silver    '(' _0_ functionParameter (_0_ ',' _0_ functionParameter)* _0_ ')' (_0_ asClause)?
36894e964efSNik Silver
36994e964efSNik SilverrecordFunctionParameter <-
37094e964efSNik Silver    '{' _0_ lowerStartIdentifier (_0_ ',' _0_ lowerStartIdentifier)* _0_ '}' (_0_ asClause)?
37194e964efSNik Silver
37294e964efSNik SilverconstructorFunctionParameter <-
37394e964efSNik Silver    upperStartIdentifier (_0_ functionParameter)* (_0_ asClause)?
37494e964efSNik Silver
37594e964efSNik SilverasClause <-
37694e964efSNik Silver    'as' _1_ lowerStartIdentifier
37794e964efSNik Silver
37894e964efSNik Silver# Expressions
37994e964efSNik Silver
38094e964efSNik Silverexpression <-
38194e964efSNik Silver    (letInBlock _NL_IND_)? simpleExpression (_0_ binaryOperator _0_ expression)*
38294e964efSNik Silver
38394e964efSNik SilversimpleExpression <-
38494e964efSNik Silver    hexNumber
38594e964efSNik Silver    / decimal
38694e964efSNik Silver    / multilineString
38794e964efSNik Silver    / characterLiteral
38894e964efSNik Silver    / oneLineString
38994e964efSNik Silver    / tupleExpression
39094e964efSNik Silver    / listExpression
39194e964efSNik Silver    / recordExpression
39294e964efSNik Silver    / caseStatement
39394e964efSNik Silver    / ifThenElseStatement
39494e964efSNik Silver    / anonymousFunction
39594e964efSNik Silver    / functionCall
39694e964efSNik Silver
39794e964efSNik SilvertupleExpression <-
39894e964efSNik Silver    '(' _0_ expression (_0_ ',' _0_ expression)* _0_ ')'
39994e964efSNik Silver    / '(' _0_ ')'
40094e964efSNik Silver
40194e964efSNik SilverlistExpression <-
40294e964efSNik Silver    '[' _0_ expression (_0_ ',' _0_ expression)* _0_ ']'
40394e964efSNik Silver    / '[' _0_ ']'
40494e964efSNik Silver
40594e964efSNik SilverrecordExpression <-
40694e964efSNik Silver    '{' _0_
40794e964efSNik Silver    (lowerStartIdentifier _0_ '|' _0_)?
40894e964efSNik Silver    recordExpressionAssignment (_0_ ',' _0_ recordExpressionAssignment)* _0_
40994e964efSNik Silver    '}'
41094e964efSNik Silver    / '{' _0_ '}'
41194e964efSNik Silver
41294e964efSNik SilverrecordExpressionAssignment <-
41394e964efSNik Silver    lowerStartIdentifier _0_ '=' _0_ expression
41494e964efSNik Silver
41594e964efSNik SilveranonymousFunction <-
41694e964efSNik Silver    '\\' _0_ functionParameterList _0_ '->' _0_ expression
41794e964efSNik Silver
41894e964efSNik SilverfunctionCall <-
41994e964efSNik Silver    ( dottedIdentifier
42094e964efSNik Silver      / '.' lowerStartIdentifier
42194e964efSNik Silver      / '(' binaryOperator ')'
42294e964efSNik Silver    ) (_1_ expression)*
42394e964efSNik Silver
42494e964efSNik Silver# Let/in block
42594e964efSNik Silver#
42694e964efSNik Silver# We'll treat let/in blocks very simply - we'll consider each line
42794e964efSNik Silver# and expect the whole line either to be the start of a function
42894e964efSNik Silver# definition (perhaps with some of its body) or its body. So something
42994e964efSNik Silver# like 'f x y =' will have to be on one line.
43094e964efSNik Silver
43194e964efSNik SilverletInBlock <-
43294e964efSNik Silver    'let' _NL_IND_
43394e964efSNik Silver    letInLine (_NL_IND_ letInLine)* _NL_IND_
43494e964efSNik Silver    'in'
43594e964efSNik Silver
43694e964efSNik SilverletInLine <-
43794e964efSNik Silver    letInFunctionDefinition
43894e964efSNik Silver    / letInBlock
43994e964efSNik Silver    / letInFunctionBody
44094e964efSNik Silver
44194e964efSNik SilverletInFunctionDefinition <-
44294e964efSNik Silver    <nonKeywordIdentifier> WS* <letInFunctionParameters>? WS* '=' Non_NL* {
44394e964efSNik Silver        int r = makeElmTag(auxil, $1, $1s, K_FUNCTION, ROLE_DEFINITION_INDEX);
44494e964efSNik Silver        addElmSignature(r, $2);
44594e964efSNik Silver    }
44694e964efSNik Silver
44794e964efSNik SilverletInFunctionParameters <-
44894e964efSNik Silver    nonKeywordIdentifier (WS+ nonKeywordIdentifier)*
44994e964efSNik Silver
45094e964efSNik SilverletInFunctionBody <-
45194e964efSNik Silver    !('let' / 'in') Non_NL+
45294e964efSNik Silver
45394e964efSNik Silver# Case statements
45494e964efSNik Silver#
45594e964efSNik Silver# We're going to be pretty loose with case statements, otherwise we'd
45694e964efSNik Silver# have to follow Elm's indentation rules. So we'll just say
45794e964efSNik Silver# the body of a case statement is a series of patterns like this:
45894e964efSNik Silver# <something> -> <expression>. The <expression> might well swallow
45994e964efSNik Silver# up a bit of the next case pattern (because to do otherwise requires
46094e964efSNik Silver# following Elm's indentation rules), so that's why we just specify
46194e964efSNik Silver# <something>.
46294e964efSNik Silver
46394e964efSNik SilvercaseStatement <-
46494e964efSNik Silver    'case' _1_ expression _0_ 'of' _1_
46594e964efSNik Silver    caseClauseList
46694e964efSNik Silver
46794e964efSNik SilvercaseClauseList <-
46894e964efSNik Silver    caseClause (_1_ caseClause)*
46994e964efSNik Silver
47094e964efSNik SilvercaseClause <-
47194e964efSNik Silver    roughCasePatternChar* '->' _0_ expression
47294e964efSNik Silver
47394e964efSNik SilverroughCasePatternChar <-
47494e964efSNik Silver    !('->' / TLSS / lineComment / delimitedComment / NL) .
47594e964efSNik Silver
47694e964efSNik Silver# If/then/else statements
47794e964efSNik Silver
47894e964efSNik SilverifThenElseStatement <-
47994e964efSNik Silver    'if' _1_ expression _1_
48094e964efSNik Silver    'then' _1_ expression _1_
48194e964efSNik Silver    'else' _1_ expression
48294e964efSNik Silver
48394e964efSNik Silver# Binary operators
48494e964efSNik Silver
48594e964efSNik SilverbinaryOperator <-
48694e964efSNik Silver    '>>' / '<<' / '|>' / '<|'
48794e964efSNik Silver    / '//' / '++' / '::'
48894e964efSNik Silver    / '==' / '/='
48994e964efSNik Silver    / '&&' / '||'
49094e964efSNik Silver    / '<=' / '>='
49194e964efSNik Silver    / '<' / '>'
49294e964efSNik Silver    / '+' / '-' / '*' / '/' / '^'
49394e964efSNik Silver
49494e964efSNik Silver# Sometimes we just need to ignore the rest of the (top level) statement
49594e964efSNik Silver
49694e964efSNik SilverignoreRestOfStatement <-
49794e964efSNik Silver    (multilineString / Non_WS_or_NL+) (_1_ ignoreRestOfStatement)*
49894e964efSNik Silver
49994e964efSNik SilvermultilineString <-
50094e964efSNik Silver    '"""' (!'"""' .)* '"""'
50194e964efSNik Silver
50294e964efSNik Silver# Low level tokens -------------------------------------------------------
50394e964efSNik Silver
50494e964efSNik Silver# Identifiers
50594e964efSNik Silver
50694e964efSNik SilvernaiveIdentifier <- [A-Za-z_] alphanumeric*
50794e964efSNik Silver
50894e964efSNik SilverupperStartIdentifier <- [A-Z] alphanumeric*
50994e964efSNik Silver
51094e964efSNik SilverlowerStartIdentifier <- !keyword [a-z_] alphanumeric*
51194e964efSNik Silver
51294e964efSNik Silveralphanumeric <- [A-Za-z0-9_]
51394e964efSNik Silver
51494e964efSNik SilvernonKeywordIdentifier <-
51594e964efSNik Silver    !keyword naiveIdentifier
51694e964efSNik Silver
51794e964efSNik Silverkeyword <-
51894e964efSNik Silver    'type' !alphanumeric
51994e964efSNik Silver    / 'module' !alphanumeric
52094e964efSNik Silver    / 'port' !alphanumeric
52194e964efSNik Silver    / 'alias' !alphanumeric
52294e964efSNik Silver    / 'as' !alphanumeric
52394e964efSNik Silver    / 'exposing' !alphanumeric
52494e964efSNik Silver    / 'import' !alphanumeric
52594e964efSNik Silver    / 'let' !alphanumeric
52694e964efSNik Silver    / 'in' !alphanumeric
52794e964efSNik Silver    / 'case' !alphanumeric
52894e964efSNik Silver    / 'of' !alphanumeric
52994e964efSNik Silver    / 'if' !alphanumeric
53094e964efSNik Silver    / 'then' !alphanumeric
53194e964efSNik Silver    / 'else' !alphanumeric
53294e964efSNik Silver
53394e964efSNik SilverdottedIdentifier <- nonKeywordIdentifier ('.' nonKeywordIdentifier)*
53494e964efSNik Silver
53594e964efSNik Silver# Numbers
53694e964efSNik Silver
53794e964efSNik Silverdecimal <-
53894e964efSNik Silver    exponentialDecimal
53994e964efSNik Silver    / simpleDecimal
54094e964efSNik Silver
54194e964efSNik SilverexponentialDecimal <-
54294e964efSNik Silver    simpleDecimal 'e' simpleInteger
54394e964efSNik Silver
54494e964efSNik SilversimpleDecimal <-
54594e964efSNik Silver    simpleInteger ('.' digits)?
54694e964efSNik Silver    / '.' digits+
54794e964efSNik Silver
54894e964efSNik SilversimpleInteger <- [-+]? digits
54994e964efSNik Silver
55094e964efSNik Silverdigits <- [0-9]+
55194e964efSNik Silver
55294e964efSNik SilverhexNumber <- '0x' [0-9A-Fa-f]+
55394e964efSNik Silver
55494e964efSNik Silver# One line strings and characters
55594e964efSNik Silver
55694e964efSNik SilveroneLineString <- '"' inStringChar* '"'
55794e964efSNik Silver
55894e964efSNik SilvercharacterLiteral <- "'" inStringChar "'"
55994e964efSNik Silver
56094e964efSNik SilverinStringChar <-
56194e964efSNik Silver    !('"' / NL)
56294e964efSNik Silver    ( inStringUnicodeChar / inStringEscapedChar / inStringPlainChar )
56394e964efSNik Silver
56494e964efSNik SilverinStringPlainChar <-
56594e964efSNik Silver    !('"' / '\\' / NL) .
56694e964efSNik Silver
56794e964efSNik SilverinStringEscapedChar <-
56894e964efSNik Silver    '\\' !('u' / NL) .
56994e964efSNik Silver
57094e964efSNik SilverinStringUnicodeChar <-
57194e964efSNik Silver    '\\u{' [0-9A-Fa-f]+ '}'
57294e964efSNik Silver
57394e964efSNik Silver# Ignorable things -------------------------------------------------------
57494e964efSNik Silver
57594e964efSNik Silver# Simple things...
57694e964efSNik Silver
57794e964efSNik SilverWS <- [ \t]+
57894e964efSNik SilverNL <- '\n' / '\f' / '\r' '\n'?
57994e964efSNik SilverNon_NL <- [^\n\r\f]
58094e964efSNik SilverNon_WS_or_NL <- [^ \t\n\r\f]
58194e964efSNik SilverEOF <- !.
58294e964efSNik Silver
58394e964efSNik Silver# A delimited comment is effectively "nothing", even if it spans several
58494e964efSNik Silver# lines. But it does separate two tokens.
58594e964efSNik Silver#
58694e964efSNik Silver# A line comment can only come at the end of a line. Notice here it doesn't
58794e964efSNik Silver# include the actual newline.
58894e964efSNik Silver
58994e964efSNik SilverdelimitedComment <- '{-' (delimitedComment / !'-}' .)* '-}'
59094e964efSNik Silver
59194e964efSNik SilverlineComment <- '--' Non_NL*
59294e964efSNik Silver
59394e964efSNik Silver# Elm whitespacing is a bit special...
59494e964efSNik Silver# - Two statements are at the same level (eg at the top level, or statements
59594e964efSNik Silver#   in the same let...in block) only if they begin with the same indentation.
59694e964efSNik Silver# - One line has more indentation than the previous line then it is a
59794e964efSNik Silver#   continuation of that previous line.
59894e964efSNik Silver# - But sometimes several statements can appear on the same line if tokens
59994e964efSNik Silver#   make it obvious. Eg this is okay:
60094e964efSNik Silver#   Eg: 'myFunc = let f x y = x + y in f 3 4'
60194e964efSNik Silver#
60294e964efSNik Silver# We'll only worry about top level statements for this part. But we still
60394e964efSNik Silver# need to know
60494e964efSNik Silver# - when a top level statement begins; and
60594e964efSNik Silver# - when two sequential tokens are part of the same top level statement.
60694e964efSNik Silver#   They may be separated by a combination of whitespace, comments, and
60794e964efSNik Silver#   newlines, but if there is a newline then that will always be followed
60894e964efSNik Silver#   by an indent.
60994e964efSNik Silver#
61094e964efSNik Silver# When considering how one token relates to the next in top level statements
61194e964efSNik Silver# we should only need three kinds of "join"s:
61294e964efSNik Silver# - Where we need whitespace, such as 'import MyModule', but that space
61394e964efSNik Silver#   may occur over multiple lines. If it's over multiple lines, the
61494e964efSNik Silver#   second token needs to be somewhat in from the first column of text.
61594e964efSNik Silver#   We'll call this _1_ - ie at least one space.
61694e964efSNik Silver# - Where we don't need whitespace, such as 'f = 3', but that space
61794e964efSNik Silver#   may occur over multiple lines. If it's over multiple lines then again
61894e964efSNik Silver#   the second token needs to be somewhat in from the first column of text.
61994e964efSNik Silver#   We'll call this _0_ - ie possibly zero space.
62094e964efSNik Silver# - When we've got an end of statement, and the next token is some
62194e964efSNik Silver#   meaningful code (not a comment) and starts in the first column of text.
62294e964efSNik Silver#   Then that next token is the start of the next top level statement.
62394e964efSNik Silver#   We'll call this TLSS, for top level statement separator.
62494e964efSNik Silver#
62594e964efSNik Silver# We can define _1_ as
62694e964efSNik Silver# - The longest possible sequence of whitespace, delimited comments,
62794e964efSNik Silver#   newlines, and line comments, as long as it ends with a whitespace
62894e964efSNik Silver#   or a delimited comment, because then it won't be in the first column.
62994e964efSNik Silver#
63094e964efSNik Silver# We can define _0_ as
63194e964efSNik Silver# - _1_ or the empty string.
63294e964efSNik Silver#
63394e964efSNik Silver# We can define TLSS as
63494e964efSNik Silver# - The longest possible sequence of whitespace, delimited comments,
63594e964efSNik Silver#   newlines, and line comments, as long as it ends with a newline or EOF
63694e964efSNik Silver#   (and there's no more ignorable characters after that).
63794e964efSNik Silver#
63894e964efSNik Silver# PEG parsing tip: If we want to define a sequence like 'the longest
63994e964efSNik Silver# sequence of As, Bs and Cs, as long as it ends with C' we define a short
64094e964efSNik Silver# sequence like 'the longest sequence of As and Bs, then a C' and then
64194e964efSNik Silver# define 'the longest sequence of those'.
64294e964efSNik Silver
64394e964efSNik Silver_1_short <-
64494e964efSNik Silver    (lineComment / NL)* (WS / delimitedComment)
64594e964efSNik Silver
64694e964efSNik Silver_1_ <- _1_short+
64794e964efSNik Silver
64894e964efSNik Silver
64994e964efSNik Silver_0_ <- _1_ / ''
65094e964efSNik Silver
65194e964efSNik SilverTLSS_short <-
65294e964efSNik Silver    (WS / lineComment / delimitedComment)* (NL / EOF)
65394e964efSNik Silver
65494e964efSNik SilverTLSS <-
65594e964efSNik Silver    TLSS_short+
65694e964efSNik Silver    !(WS / lineComment / delimitedComment)
65794e964efSNik Silver
65894e964efSNik Silver# An end of statement marks the end of a top level statement, but
65994e964efSNik Silver# doesn't consume anything
66094e964efSNik Silver
66194e964efSNik SilverEOS <- &( TLSS / EOF )
66294e964efSNik Silver
66394e964efSNik Silver# When considering lines in a let/in block we'll want to look for
66494e964efSNik Silver# a newline and an indent. There may be some delimited comments etc
66594e964efSNik Silver# in between.
66694e964efSNik Silver
66794e964efSNik Silver_NL_IND_ <-
66894e964efSNik Silver    TLSS_short+ WS+
66994e964efSNik Silver
67094e964efSNik Silver%%
67194e964efSNik Silver#include "elm_post.h"
672