194e964efSNik Silver# Copyright (c) 2022 Nik Silver 294e964efSNik Silver# 394e964efSNik Silver# This source code is released for free distribution under the terms of the 494e964efSNik Silver# GNU General Public License version 2 or later. 594e964efSNik Silver# 694e964efSNik Silver# Thanks to: 794e964efSNik Silver# - Mark Skipper, for the original Elm optlib parser, which inspired this; 894e964efSNik Silver# - Samuel Stauffer, for the Thrift PEG parser, which showed me how to 994e964efSNik Silver# write a PEG parser; 1094e964efSNik Silver# - Jan Dolinár, for the Kotlin PEG parser, which also provided insight; 1194e964efSNik Silver# - Masatake YAMATO, for patience and guidance in code reviews. 1294e964efSNik Silver# 1394e964efSNik Silver# This parser generates tags for Elm. See https://elm-lang.org/docs/syntax 1494e964efSNik Silver# for language reference. 1594e964efSNik Silver# 1694e964efSNik Silver# The parser will tag items reliably at the top level. Functions 1794e964efSNik Silver# defined in let/in blocks are also tagged, but with limitations. See below. 1894e964efSNik Silver# 1994e964efSNik Silver# Kinds 2094e964efSNik Silver# - m module 2194e964efSNik Silver# - n namespace (ie a module that's renamed) 2294e964efSNik Silver# - t type 2394e964efSNik Silver# - c constructor (within a type) 2494e964efSNik Silver# - a alias 2594e964efSNik Silver# - p port 2694e964efSNik Silver# - f function 2794e964efSNik Silver# 2894e964efSNik Silver# Key/value pairs 2994e964efSNik Silver# - roles:def This is defined here. 3094e964efSNik Silver# - roles:imported This is imported here. 3194e964efSNik Silver# - type:<t> This constructor is in the scope of type <t>, which 3294e964efSNik Silver# may be dotted. Eg Main.myType. 3394e964efSNik Silver# - function:<f> This function is in the scope of function <f>, which 3494e964efSNik Silver# may be dotted. Eg Main.myFunc. 3594e964efSNik Silver# - module:<m> This is in the scope of module <m>. 3694e964efSNik Silver# - typeref:description:<t> This function, constructor or port 3794e964efSNik Silver# has type <t>. 3894e964efSNik Silver# - moduleName:<m> This namespace has original module name <m>. 3994e964efSNik Silver# 4094e964efSNik Silver# Functions defined in let/in blocks may be tagged, with these limitations: 4194e964efSNik Silver# - the LHS (up to and including the '=') need to be on a single line; 4294e964efSNik Silver# - the LHS can only have simple parameters; 4394e964efSNik Silver# - their scope is only marked as being in the top-most function; 4494e964efSNik Silver# - any type annotation is ignored. 4594e964efSNik Silver# This should be good for 90% of inner functions. To make it totally robust 4694e964efSNik Silver# is much more complicated due to (a) Elm's clever indentation-sensitivity 4794e964efSNik Silver# and (b) limitations of the PEG parser used here. 4894e964efSNik Silver# 4994e964efSNik Silver# To do: 5094e964efSNik Silver# Maybe do: 5194e964efSNik Silver# - let/in blocks 5294e964efSNik Silver# - Allow tuples on the LHS. Eg '(val1, val2) = valFunc'. 5394e964efSNik Silver# - Inner functions' type annotations are used in the function's 5494e964efSNik Silver# type description. 5594e964efSNik Silver# - Inner functions can have more complex parameters. 5694e964efSNik Silver# - Functions 5794e964efSNik Silver# - Allow non-Latin upper and lower case. Use 5894e964efSNik Silver# https://util.unicode.org/UnicodeJsps/properties.html 5994e964efSNik Silver# combined with \p{Lu}, \p{Ll} and \p{L}. 6094e964efSNik Silver# 6194e964efSNik Silver# Won't do: 6294e964efSNik Silver# - Handle Elm's indentation properly. 6394e964efSNik Silver 6494e964efSNik Silver 6594e964efSNik Silver%prefix "pelm" 6694e964efSNik Silver 6794e964efSNik Silver%auxil "struct parserCtx *" 6894e964efSNik Silver 6994e964efSNik Silver%earlysource { 7094e964efSNik Silver #include "general.h" 7194e964efSNik Silver} 7294e964efSNik Silver 7394e964efSNik Silver%header { 7494e964efSNik Silver struct parserCtx; 7594e964efSNik Silver} 7694e964efSNik Silver 7794e964efSNik Silver%source { 7894e964efSNik Silver#include "elm_pre.h" 7994e964efSNik Silver#include "routines.h" 8094e964efSNik Silver 8194e964efSNik Silver/* 8294e964efSNik Silver * Include these lines to debug the parsing. 8394e964efSNik Silver * From https://github.com/arithy/packcc#macros 8494e964efSNik Silver * This will output parsing info to STDERR.tmp in the vent of a failed test. 8594e964efSNik Silver */ 8694e964efSNik Silver 8794e964efSNik Silver/* 8894e964efSNik Silverstatic const char *dbg_str[] = { "Evaluating rule", "Matched rule", "Abandoning rule" }; 8994e964efSNik Silver 9094e964efSNik Silver#define PCC_DEBUG(auxil, event, rule, level, pos, buffer, length) \ 9194e964efSNik Silver fprintf(stderr, "%*s%s %s @%zu [%.*s]\n", \ 9294e964efSNik Silver (int)((level) * 2), "", dbg_str[event], rule, pos, (int)(length), buffer) 9394e964efSNik Silver */ 9494e964efSNik Silver} 9594e964efSNik Silver 9694e964efSNik Silver# Top level elements ----------------------------------------------------- 9794e964efSNik Silver 9894e964efSNik Silver# We separate the file into the module section and the main section 9994e964efSNik Silver# so that we only consider and tag one module declaration 10094e964efSNik Silver 10194e964efSNik Silverfile <- 10294e964efSNik Silver { 10394e964efSNik Silver ELM_INIT_MODULE_SCOPE; 10494e964efSNik Silver } 10594e964efSNik Silver TLSS? 10694e964efSNik Silver moduleDeclaration? 10794e964efSNik Silver TLSS? 10894e964efSNik Silver mainTopLevelStatements? 10994e964efSNik Silver TLSS? 11094e964efSNik Silver EOF 11194e964efSNik Silver 11294e964efSNik SilvermainTopLevelStatements <- 11394e964efSNik Silver topLevelStatement (TLSS topLevelStatement)* 11494e964efSNik Silver 11594e964efSNik SilvertopLevelStatement <- 11694e964efSNik Silver importStatement 11794e964efSNik Silver / typeAlias 11894e964efSNik Silver / customType 11994e964efSNik Silver / portDeclaration 12094e964efSNik Silver / functionWithTypeAnnotation 12194e964efSNik Silver / functionDefinition 12294e964efSNik Silver / ignoreRestOfStatement 12394e964efSNik Silver 12494e964efSNik Silver# Main Elm grammar ------------------------------------------------------- 12594e964efSNik Silver 12694e964efSNik Silver# Module declaration 12794e964efSNik Silver# 12894e964efSNik Silver# We can be a bit relaxed about distinguishing functions, types and 12994e964efSNik Silver# constructors listed in a module declaration, because we're not going 13094e964efSNik Silver# to tag them. 13194e964efSNik Silver 13294e964efSNik SilvermoduleDeclaration <- 13394e964efSNik Silver ('port' _1_)? 'module' _1_ <dottedIdentifier> _1_ 'exposing' _0_ '(' exposedList ')' EOS { 13494e964efSNik Silver elm_module_scope_index = makeElmTagSettingScope(auxil, $1, $1s, K_MODULE, ROLE_DEFINITION_INDEX); 13594e964efSNik Silver } 13694e964efSNik Silver 13794e964efSNik SilverexposedList <- _0_ exposedItem _0_ (',' _0_ exposedList )* 13894e964efSNik Silver 13994e964efSNik SilverexposedItem <- 14094e964efSNik Silver exposedFieldOrType 14194e964efSNik Silver / exposedFunction 14294e964efSNik Silver / exposedItemIgnored 14394e964efSNik Silver 14494e964efSNik SilverexposedFieldOrType <- 14594e964efSNik Silver <upperStartIdentifier> (_0_ '(' _0_ exposedTypeConstructorList _0_ ')')? 14694e964efSNik Silver 14794e964efSNik SilverexposedFunction <- 14894e964efSNik Silver lowerStartIdentifier 14994e964efSNik Silver 15094e964efSNik SilverexposedItemIgnored <- '.'+ 15194e964efSNik Silver 15294e964efSNik SilverexposedTypeConstructorList <- 15394e964efSNik Silver (upperStartIdentifier / exposedItemIgnored) _0_ (',' _0_ exposedTypeConstructorList)* 15494e964efSNik Silver 15594e964efSNik Silver# Type alias 15694e964efSNik Silver# 15794e964efSNik Silver# We don't care what the actual alias is 15894e964efSNik Silver 15994e964efSNik SilvertypeAlias <- 16094e964efSNik Silver 'type' _1_ 'alias' _1_ <upperStartIdentifier> _0_ '=' _0_ ignoreRestOfStatement { 16194e964efSNik Silver makeElmTag(auxil, $1, $1s, K_ALIAS, ROLE_DEFINITION_INDEX); 16294e964efSNik Silver } 16394e964efSNik Silver 16494e964efSNik Silver# Custom type 16594e964efSNik Silver# 16694e964efSNik Silver# Includes type parameters, such as 'x' in 'type MyType x = Wrap x'. 16794e964efSNik Silver# 16894e964efSNik Silver# In a definition such as 'type MyType = Cons1 String Int' we 16994e964efSNik Silver# capture 'MyType', and then for each type in each constructor 17094e964efSNik Silver# subtype (here, 'String' and 'Int') we append a '->' and finally 17194e964efSNik Silver# concatentate them all to get the constructor's type description, 17294e964efSNik Silver# such as 'String -> Int -> MyType' 17394e964efSNik Silver 17494e964efSNik SilvercustomType <- 17594e964efSNik Silver 'type' _1_ <upperStartIdentifier> (_0_ typeParameterList)? _0_ '=' _0_ { 17694e964efSNik Silver initElmConstructorFields(auxil, $1); 17794e964efSNik Silver makeElmTagSettingScope(auxil, $1, $1s, K_TYPE, ROLE_DEFINITION_INDEX); 17894e964efSNik Silver } constructorList EOS { 17994e964efSNik Silver POP_SCOPE(auxil); 18094e964efSNik Silver tidyElmConstructorFields(auxil); 18194e964efSNik Silver } 18294e964efSNik Silver 18394e964efSNik SilvertypeParameterList <- lowerStartIdentifier (_1_ lowerStartIdentifier)* 18494e964efSNik Silver 18594e964efSNik Silver# A type could be defined as a constructor list: 18694e964efSNik Silver# type A = Cons1 String | Cons2 Float Float | ... 18794e964efSNik Silver# The 'String' and the 'Float Float' etc are the constructor subtypes. 18894e964efSNik Silver# Each 'String', 'Float', etc is a single type spec. 18994e964efSNik Silver# But a single type spec could also be a record, a tuple or a function spec. 19094e964efSNik Silver# 19194e964efSNik Silver# Subtypes in constructors need to be parsed differently from types in 19294e964efSNik Silver# type annotations and record fields. Consider these: 19394e964efSNik Silver# type A1Type a b = A1Cons a b -- Line 1 19494e964efSNik Silver# type A2Type a b = A2Cons String a b -- Line 2 19594e964efSNik Silver# type BType a b = BCons { x : A2Type a b} -- Line 3 19694e964efSNik Silver# cFunc : A1Type String Int -> String -- Line 4 19794e964efSNik Silver# In line 1, 'a b' must be parsed as two individual types (parameterised). 19894e964efSNik Silver# In line 2, 'String a b' must be parsed as three individual types. 19994e964efSNik Silver# In line 3, 'A2Type a b' must be parsed as one type, even though it's 20094e964efSNik Silver# lexically equivalent to 'String a b' on line 2. 20194e964efSNik Silver# In line 4, 'A1Type String Int' must also be parsed one type. 20294e964efSNik Silver# This means we have to have slightly different rules for parsing a 20394e964efSNik Silver# constructor's subtypes as from other cases. The first case is handled 20494e964efSNik Silver# by constructorSubtypeList and singleConstructorSubtypeSpec. The second 20594e964efSNik Silver# case is handled by singleTypeSpec. 20694e964efSNik Silver 20794e964efSNik SilverconstructorList <- <upperStartIdentifier> { 20894e964efSNik Silver initElmConstructorSubtypeFields(auxil); 20994e964efSNik Silver } _0_ <constructorSubtypeList>? { 21094e964efSNik Silver int r = makeElmTag(auxil, $1, $1s, K_CONSTRUCTOR, ROLE_DEFINITION_INDEX); 21194e964efSNik Silver addElmConstructorTypeRef(auxil, r); 21294e964efSNik Silver } _0_ ('|' _0_ constructorList)? 21394e964efSNik Silver 21494e964efSNik SilverconstructorSubtypeList <- singleConstructorSubtypeSpec (_0_ singleConstructorSubtypeSpec)* 21594e964efSNik Silver 21694e964efSNik SilversingleConstructorSubtypeSpec <- 21794e964efSNik Silver < recordTypeSpec 21894e964efSNik Silver / tupleTypeSpec 21994e964efSNik Silver / functionTypeSpec 22094e964efSNik Silver / dottedIdentifier 22194e964efSNik Silver > 22294e964efSNik Silver { 22394e964efSNik Silver addElmConstructorSubtype(auxil, $1); 22494e964efSNik Silver } 22594e964efSNik Silver 22694e964efSNik SilversingleTypeSpec <- 22794e964efSNik Silver recordTypeSpec 22894e964efSNik Silver / tupleTypeSpec 22994e964efSNik Silver / functionTypeSpec 23094e964efSNik Silver / parameterisedTypeSpec 23194e964efSNik Silver 23294e964efSNik SilverrecordTypeSpec <- 23394e964efSNik Silver '{' (_0_ recordRestrictionPrefix)? _0_ fieldSpec (_0_ ',' _0_ fieldSpec)* _0_ '}' 23494e964efSNik Silver / '{' (_0_ recordRestrictionPrefix)? _0_ '}' 23594e964efSNik Silver 23694e964efSNik SilverrecordRestrictionPrefix <- 23794e964efSNik Silver lowerStartIdentifier _0_ '|' 23894e964efSNik Silver 23994e964efSNik SilverfieldSpec <- 24094e964efSNik Silver lowerStartIdentifier _0_ ':' _0_ singleTypeSpec 24194e964efSNik Silver 24294e964efSNik SilvertupleTypeSpec <- 24394e964efSNik Silver '(' _0_ singleTypeSpec (_0_ ',' _0_ singleTypeSpec)* _0_ ')' 24494e964efSNik Silver / '(' _0_ ')' 24594e964efSNik Silver 24694e964efSNik SilverparameterisedTypeSpec <- 24794e964efSNik Silver dottedIdentifier (_1_ (singleTypeSpec / lowerStartIdentifier))* 24894e964efSNik Silver 24994e964efSNik SilverfunctionTypeSpec <- 25094e964efSNik Silver singleTypeSpec (_0_ '->' _0_ singleTypeSpec)+ 25194e964efSNik Silver 25294e964efSNik Silver# Port declaration 25394e964efSNik Silver 25494e964efSNik SilverportDeclaration <- 25594e964efSNik Silver 'port' _1_ <lowerStartIdentifier> _0_ ':' _0_ <typeAnnotation> EOS { 25694e964efSNik Silver int r = makeElmTag(auxil, $1, $1s, K_PORT, ROLE_DEFINITION_INDEX); 25794e964efSNik Silver addElmTypeRef(r, $2); 25894e964efSNik Silver } 25994e964efSNik Silver 26094e964efSNik Silver# Import statement 26194e964efSNik Silver# 26294e964efSNik Silver# For the import statement we don't want the imported items to appear in the 26394e964efSNik Silver# scope of the current module (ie this file), otherwise they'll be named 26494e964efSNik Silver# wrongly. So we # want to save the module scope, make the imported tags, 26594e964efSNik Silver# then restore the module scope. We do this in two separate C code blocks, 26694e964efSNik Silver# because the module scope needs to be saved before any of the imported tags 26794e964efSNik Silver# are made. 26894e964efSNik Silver# 26994e964efSNik Silver# Also, if we create a namespace then that *does* live in the scope of the 27094e964efSNik Silver# current module, so we'll make that tag (if needed) before saving the 27194e964efSNik Silver# module scope. 27294e964efSNik Silver 27394e964efSNik SilverimportStatement <- 27494e964efSNik Silver 'import' _1_ <dottedIdentifier> (_1_ 'as' _1_ <upperStartIdentifier>)? { 27594e964efSNik Silver // Make the namespace tag first, as it's in the file module's scope 27694e964efSNik Silver if ($2s > 0) { 27794e964efSNik Silver int r = makeElmTag(auxil, $2, $2s, K_NAMESPACE, ROLE_DEFINITION_INDEX); 27894e964efSNik Silver attachParserFieldToCorkEntry (r, ElmFields[F_MODULENAME].ftype, $1); 27994e964efSNik Silver } 28094e964efSNik Silver 28194e964efSNik Silver // Now make the tag for the imported module, as it lives outside 28294e964efSNik Silver // the scope of the file module 28394e964efSNik Silver ELM_SAVE_MODULE_SCOPE; 284*fa5642e4SMasatake YAMATO makeElmTagSettingScope(auxil, $1, $1s, K_MODULE, ELM_MODULE_IMPORTED); 28594e964efSNik Silver } (_1_ 'exposing' _0_ '(' _0_ importedList _0_ ')')? EOS { 28694e964efSNik Silver ELM_RESTORE_MODULE_SCOPE; 28794e964efSNik Silver } 28894e964efSNik Silver 28994e964efSNik SilverimportedList <- importedItem _0_ (',' _0_ importedList)* 29094e964efSNik Silver 29194e964efSNik SilverimportedItem <- 29294e964efSNik Silver importedFunction 29394e964efSNik Silver / importedType 29494e964efSNik Silver / importedItemIgnored 29594e964efSNik Silver 29694e964efSNik SilverimportedFunction <- <lowerStartIdentifier> { 297*fa5642e4SMasatake YAMATO makeElmTag(auxil, $1, $1s, K_FUNCTION, ELM_FUNCTION_EXPOSED); 29894e964efSNik Silver } 29994e964efSNik Silver 30094e964efSNik Silver# When importing a type and constructors we want the constructors 30194e964efSNik Silver# to be in the scope of the type. So we have to set the scope as the 30294e964efSNik Silver# type first, before parsing (and making the tags for) the constructors. 30394e964efSNik Silver# That's why the code here uses two separate C code blocks. 30494e964efSNik Silver 30594e964efSNik SilverimportedType <- 30694e964efSNik Silver <upperStartIdentifier> { 307*fa5642e4SMasatake YAMATO makeElmTagSettingScope(auxil, $1, $1s, K_TYPE, ELM_TYPE_EXPOSED); 30894e964efSNik Silver } (_0_ '(' _0_ importedTypeConstructorList _0_ ')')? { 30994e964efSNik Silver // We're done with the type and its constructors, so we can pop it 31094e964efSNik Silver POP_SCOPE(auxil); 31194e964efSNik Silver } 31294e964efSNik Silver 31394e964efSNik SilverimportedItemIgnored <- '.'+ 31494e964efSNik Silver 31594e964efSNik SilverimportedTypeConstructorList <- 31694e964efSNik Silver (importedTypeConstructor / importedItemIgnored) _0_ (',' _0_ importedTypeConstructorList)* 31794e964efSNik Silver 31894e964efSNik SilverimportedTypeConstructor <- 31994e964efSNik Silver <upperStartIdentifier> { 320*fa5642e4SMasatake YAMATO makeElmTag(auxil, $1, $1s, K_CONSTRUCTOR, ELM_CONSTRUCTOR_EXPOSED); 32194e964efSNik Silver } 32294e964efSNik Silver 32394e964efSNik Silver# Function with a type annotation. 32494e964efSNik Silver# 32594e964efSNik Silver# The type is on one line, and the function must follow immediately as 32694e964efSNik Silver# the next top level statement 32794e964efSNik Silver 32894e964efSNik SilverfunctionWithTypeAnnotation <- 32994e964efSNik Silver <lowerStartIdentifier> _0_ ':' _0_ <typeAnnotation> TLSS 33094e964efSNik Silver <$1> _1_ <functionParameterList>? { 33194e964efSNik Silver int r = makeElmTagSettingScope(auxil, $3, $3s, K_FUNCTION, ROLE_DEFINITION_INDEX); 33294e964efSNik Silver addElmTypeRef(r, $2); 33394e964efSNik Silver addElmSignature(r, $4); 33494e964efSNik Silver } _0_ '=' _0_ expression EOS { 33594e964efSNik Silver POP_SCOPE(auxil); 33694e964efSNik Silver } 33794e964efSNik Silver 33894e964efSNik SilvertypeAnnotation <- 33994e964efSNik Silver singleTypeSpec (_0_ '->' _0_ singleTypeSpec)* 34094e964efSNik Silver 34194e964efSNik Silver# Function without a type annotation 34294e964efSNik Silver 34394e964efSNik SilverfunctionDefinition <- 34494e964efSNik Silver <nonKeywordIdentifier> _0_ <functionParameterList>? { 34594e964efSNik Silver int r = makeElmTagSettingScope(auxil, $1, $1s, K_FUNCTION, ROLE_DEFINITION_INDEX); 34694e964efSNik Silver addElmSignature(r, $2); 34794e964efSNik Silver } _0_ '=' _0_ expression EOS { 34894e964efSNik Silver POP_SCOPE(auxil); 34994e964efSNik Silver } 35094e964efSNik Silver 35194e964efSNik Silver# A function parameter list is what we define a function with. It's the 35294e964efSNik Silver# x y z in 'fn x y z'. But of course they can be more complex, such as 35394e964efSNik Silver# 'fn (Cons a b) ({ thing } as otherThing))' etc. 35494e964efSNik Silver 35594e964efSNik SilverfunctionParameterList <- functionParameter (_0_ functionParameter)* 35694e964efSNik Silver 35794e964efSNik SilverfunctionParameter <- 35894e964efSNik Silver plainFunctionParameter 35994e964efSNik Silver / tupleFunctionParameter 36094e964efSNik Silver / recordFunctionParameter 36194e964efSNik Silver / constructorFunctionParameter 36294e964efSNik Silver 36394e964efSNik SilverplainFunctionParameter <- 36494e964efSNik Silver lowerStartIdentifier (_0_ asClause)? 36594e964efSNik Silver 36694e964efSNik SilvertupleFunctionParameter <- 36794e964efSNik Silver '(' _0_ functionParameter (_0_ ',' _0_ functionParameter)* _0_ ')' (_0_ asClause)? 36894e964efSNik Silver 36994e964efSNik SilverrecordFunctionParameter <- 37094e964efSNik Silver '{' _0_ lowerStartIdentifier (_0_ ',' _0_ lowerStartIdentifier)* _0_ '}' (_0_ asClause)? 37194e964efSNik Silver 37294e964efSNik SilverconstructorFunctionParameter <- 37394e964efSNik Silver upperStartIdentifier (_0_ functionParameter)* (_0_ asClause)? 37494e964efSNik Silver 37594e964efSNik SilverasClause <- 37694e964efSNik Silver 'as' _1_ lowerStartIdentifier 37794e964efSNik Silver 37894e964efSNik Silver# Expressions 37994e964efSNik Silver 38094e964efSNik Silverexpression <- 38194e964efSNik Silver (letInBlock _NL_IND_)? simpleExpression (_0_ binaryOperator _0_ expression)* 38294e964efSNik Silver 38394e964efSNik SilversimpleExpression <- 38494e964efSNik Silver hexNumber 38594e964efSNik Silver / decimal 38694e964efSNik Silver / multilineString 38794e964efSNik Silver / characterLiteral 38894e964efSNik Silver / oneLineString 38994e964efSNik Silver / tupleExpression 39094e964efSNik Silver / listExpression 39194e964efSNik Silver / recordExpression 39294e964efSNik Silver / caseStatement 39394e964efSNik Silver / ifThenElseStatement 39494e964efSNik Silver / anonymousFunction 39594e964efSNik Silver / functionCall 39694e964efSNik Silver 39794e964efSNik SilvertupleExpression <- 39894e964efSNik Silver '(' _0_ expression (_0_ ',' _0_ expression)* _0_ ')' 39994e964efSNik Silver / '(' _0_ ')' 40094e964efSNik Silver 40194e964efSNik SilverlistExpression <- 40294e964efSNik Silver '[' _0_ expression (_0_ ',' _0_ expression)* _0_ ']' 40394e964efSNik Silver / '[' _0_ ']' 40494e964efSNik Silver 40594e964efSNik SilverrecordExpression <- 40694e964efSNik Silver '{' _0_ 40794e964efSNik Silver (lowerStartIdentifier _0_ '|' _0_)? 40894e964efSNik Silver recordExpressionAssignment (_0_ ',' _0_ recordExpressionAssignment)* _0_ 40994e964efSNik Silver '}' 41094e964efSNik Silver / '{' _0_ '}' 41194e964efSNik Silver 41294e964efSNik SilverrecordExpressionAssignment <- 41394e964efSNik Silver lowerStartIdentifier _0_ '=' _0_ expression 41494e964efSNik Silver 41594e964efSNik SilveranonymousFunction <- 41694e964efSNik Silver '\\' _0_ functionParameterList _0_ '->' _0_ expression 41794e964efSNik Silver 41894e964efSNik SilverfunctionCall <- 41994e964efSNik Silver ( dottedIdentifier 42094e964efSNik Silver / '.' lowerStartIdentifier 42194e964efSNik Silver / '(' binaryOperator ')' 42294e964efSNik Silver ) (_1_ expression)* 42394e964efSNik Silver 42494e964efSNik Silver# Let/in block 42594e964efSNik Silver# 42694e964efSNik Silver# We'll treat let/in blocks very simply - we'll consider each line 42794e964efSNik Silver# and expect the whole line either to be the start of a function 42894e964efSNik Silver# definition (perhaps with some of its body) or its body. So something 42994e964efSNik Silver# like 'f x y =' will have to be on one line. 43094e964efSNik Silver 43194e964efSNik SilverletInBlock <- 43294e964efSNik Silver 'let' _NL_IND_ 43394e964efSNik Silver letInLine (_NL_IND_ letInLine)* _NL_IND_ 43494e964efSNik Silver 'in' 43594e964efSNik Silver 43694e964efSNik SilverletInLine <- 43794e964efSNik Silver letInFunctionDefinition 43894e964efSNik Silver / letInBlock 43994e964efSNik Silver / letInFunctionBody 44094e964efSNik Silver 44194e964efSNik SilverletInFunctionDefinition <- 44294e964efSNik Silver <nonKeywordIdentifier> WS* <letInFunctionParameters>? WS* '=' Non_NL* { 44394e964efSNik Silver int r = makeElmTag(auxil, $1, $1s, K_FUNCTION, ROLE_DEFINITION_INDEX); 44494e964efSNik Silver addElmSignature(r, $2); 44594e964efSNik Silver } 44694e964efSNik Silver 44794e964efSNik SilverletInFunctionParameters <- 44894e964efSNik Silver nonKeywordIdentifier (WS+ nonKeywordIdentifier)* 44994e964efSNik Silver 45094e964efSNik SilverletInFunctionBody <- 45194e964efSNik Silver !('let' / 'in') Non_NL+ 45294e964efSNik Silver 45394e964efSNik Silver# Case statements 45494e964efSNik Silver# 45594e964efSNik Silver# We're going to be pretty loose with case statements, otherwise we'd 45694e964efSNik Silver# have to follow Elm's indentation rules. So we'll just say 45794e964efSNik Silver# the body of a case statement is a series of patterns like this: 45894e964efSNik Silver# <something> -> <expression>. The <expression> might well swallow 45994e964efSNik Silver# up a bit of the next case pattern (because to do otherwise requires 46094e964efSNik Silver# following Elm's indentation rules), so that's why we just specify 46194e964efSNik Silver# <something>. 46294e964efSNik Silver 46394e964efSNik SilvercaseStatement <- 46494e964efSNik Silver 'case' _1_ expression _0_ 'of' _1_ 46594e964efSNik Silver caseClauseList 46694e964efSNik Silver 46794e964efSNik SilvercaseClauseList <- 46894e964efSNik Silver caseClause (_1_ caseClause)* 46994e964efSNik Silver 47094e964efSNik SilvercaseClause <- 47194e964efSNik Silver roughCasePatternChar* '->' _0_ expression 47294e964efSNik Silver 47394e964efSNik SilverroughCasePatternChar <- 47494e964efSNik Silver !('->' / TLSS / lineComment / delimitedComment / NL) . 47594e964efSNik Silver 47694e964efSNik Silver# If/then/else statements 47794e964efSNik Silver 47894e964efSNik SilverifThenElseStatement <- 47994e964efSNik Silver 'if' _1_ expression _1_ 48094e964efSNik Silver 'then' _1_ expression _1_ 48194e964efSNik Silver 'else' _1_ expression 48294e964efSNik Silver 48394e964efSNik Silver# Binary operators 48494e964efSNik Silver 48594e964efSNik SilverbinaryOperator <- 48694e964efSNik Silver '>>' / '<<' / '|>' / '<|' 48794e964efSNik Silver / '//' / '++' / '::' 48894e964efSNik Silver / '==' / '/=' 48994e964efSNik Silver / '&&' / '||' 49094e964efSNik Silver / '<=' / '>=' 49194e964efSNik Silver / '<' / '>' 49294e964efSNik Silver / '+' / '-' / '*' / '/' / '^' 49394e964efSNik Silver 49494e964efSNik Silver# Sometimes we just need to ignore the rest of the (top level) statement 49594e964efSNik Silver 49694e964efSNik SilverignoreRestOfStatement <- 49794e964efSNik Silver (multilineString / Non_WS_or_NL+) (_1_ ignoreRestOfStatement)* 49894e964efSNik Silver 49994e964efSNik SilvermultilineString <- 50094e964efSNik Silver '"""' (!'"""' .)* '"""' 50194e964efSNik Silver 50294e964efSNik Silver# Low level tokens ------------------------------------------------------- 50394e964efSNik Silver 50494e964efSNik Silver# Identifiers 50594e964efSNik Silver 50694e964efSNik SilvernaiveIdentifier <- [A-Za-z_] alphanumeric* 50794e964efSNik Silver 50894e964efSNik SilverupperStartIdentifier <- [A-Z] alphanumeric* 50994e964efSNik Silver 51094e964efSNik SilverlowerStartIdentifier <- !keyword [a-z_] alphanumeric* 51194e964efSNik Silver 51294e964efSNik Silveralphanumeric <- [A-Za-z0-9_] 51394e964efSNik Silver 51494e964efSNik SilvernonKeywordIdentifier <- 51594e964efSNik Silver !keyword naiveIdentifier 51694e964efSNik Silver 51794e964efSNik Silverkeyword <- 51894e964efSNik Silver 'type' !alphanumeric 51994e964efSNik Silver / 'module' !alphanumeric 52094e964efSNik Silver / 'port' !alphanumeric 52194e964efSNik Silver / 'alias' !alphanumeric 52294e964efSNik Silver / 'as' !alphanumeric 52394e964efSNik Silver / 'exposing' !alphanumeric 52494e964efSNik Silver / 'import' !alphanumeric 52594e964efSNik Silver / 'let' !alphanumeric 52694e964efSNik Silver / 'in' !alphanumeric 52794e964efSNik Silver / 'case' !alphanumeric 52894e964efSNik Silver / 'of' !alphanumeric 52994e964efSNik Silver / 'if' !alphanumeric 53094e964efSNik Silver / 'then' !alphanumeric 53194e964efSNik Silver / 'else' !alphanumeric 53294e964efSNik Silver 53394e964efSNik SilverdottedIdentifier <- nonKeywordIdentifier ('.' nonKeywordIdentifier)* 53494e964efSNik Silver 53594e964efSNik Silver# Numbers 53694e964efSNik Silver 53794e964efSNik Silverdecimal <- 53894e964efSNik Silver exponentialDecimal 53994e964efSNik Silver / simpleDecimal 54094e964efSNik Silver 54194e964efSNik SilverexponentialDecimal <- 54294e964efSNik Silver simpleDecimal 'e' simpleInteger 54394e964efSNik Silver 54494e964efSNik SilversimpleDecimal <- 54594e964efSNik Silver simpleInteger ('.' digits)? 54694e964efSNik Silver / '.' digits+ 54794e964efSNik Silver 54894e964efSNik SilversimpleInteger <- [-+]? digits 54994e964efSNik Silver 55094e964efSNik Silverdigits <- [0-9]+ 55194e964efSNik Silver 55294e964efSNik SilverhexNumber <- '0x' [0-9A-Fa-f]+ 55394e964efSNik Silver 55494e964efSNik Silver# One line strings and characters 55594e964efSNik Silver 55694e964efSNik SilveroneLineString <- '"' inStringChar* '"' 55794e964efSNik Silver 55894e964efSNik SilvercharacterLiteral <- "'" inStringChar "'" 55994e964efSNik Silver 56094e964efSNik SilverinStringChar <- 56194e964efSNik Silver !('"' / NL) 56294e964efSNik Silver ( inStringUnicodeChar / inStringEscapedChar / inStringPlainChar ) 56394e964efSNik Silver 56494e964efSNik SilverinStringPlainChar <- 56594e964efSNik Silver !('"' / '\\' / NL) . 56694e964efSNik Silver 56794e964efSNik SilverinStringEscapedChar <- 56894e964efSNik Silver '\\' !('u' / NL) . 56994e964efSNik Silver 57094e964efSNik SilverinStringUnicodeChar <- 57194e964efSNik Silver '\\u{' [0-9A-Fa-f]+ '}' 57294e964efSNik Silver 57394e964efSNik Silver# Ignorable things ------------------------------------------------------- 57494e964efSNik Silver 57594e964efSNik Silver# Simple things... 57694e964efSNik Silver 57794e964efSNik SilverWS <- [ \t]+ 57894e964efSNik SilverNL <- '\n' / '\f' / '\r' '\n'? 57994e964efSNik SilverNon_NL <- [^\n\r\f] 58094e964efSNik SilverNon_WS_or_NL <- [^ \t\n\r\f] 58194e964efSNik SilverEOF <- !. 58294e964efSNik Silver 58394e964efSNik Silver# A delimited comment is effectively "nothing", even if it spans several 58494e964efSNik Silver# lines. But it does separate two tokens. 58594e964efSNik Silver# 58694e964efSNik Silver# A line comment can only come at the end of a line. Notice here it doesn't 58794e964efSNik Silver# include the actual newline. 58894e964efSNik Silver 58994e964efSNik SilverdelimitedComment <- '{-' (delimitedComment / !'-}' .)* '-}' 59094e964efSNik Silver 59194e964efSNik SilverlineComment <- '--' Non_NL* 59294e964efSNik Silver 59394e964efSNik Silver# Elm whitespacing is a bit special... 59494e964efSNik Silver# - Two statements are at the same level (eg at the top level, or statements 59594e964efSNik Silver# in the same let...in block) only if they begin with the same indentation. 59694e964efSNik Silver# - One line has more indentation than the previous line then it is a 59794e964efSNik Silver# continuation of that previous line. 59894e964efSNik Silver# - But sometimes several statements can appear on the same line if tokens 59994e964efSNik Silver# make it obvious. Eg this is okay: 60094e964efSNik Silver# Eg: 'myFunc = let f x y = x + y in f 3 4' 60194e964efSNik Silver# 60294e964efSNik Silver# We'll only worry about top level statements for this part. But we still 60394e964efSNik Silver# need to know 60494e964efSNik Silver# - when a top level statement begins; and 60594e964efSNik Silver# - when two sequential tokens are part of the same top level statement. 60694e964efSNik Silver# They may be separated by a combination of whitespace, comments, and 60794e964efSNik Silver# newlines, but if there is a newline then that will always be followed 60894e964efSNik Silver# by an indent. 60994e964efSNik Silver# 61094e964efSNik Silver# When considering how one token relates to the next in top level statements 61194e964efSNik Silver# we should only need three kinds of "join"s: 61294e964efSNik Silver# - Where we need whitespace, such as 'import MyModule', but that space 61394e964efSNik Silver# may occur over multiple lines. If it's over multiple lines, the 61494e964efSNik Silver# second token needs to be somewhat in from the first column of text. 61594e964efSNik Silver# We'll call this _1_ - ie at least one space. 61694e964efSNik Silver# - Where we don't need whitespace, such as 'f = 3', but that space 61794e964efSNik Silver# may occur over multiple lines. If it's over multiple lines then again 61894e964efSNik Silver# the second token needs to be somewhat in from the first column of text. 61994e964efSNik Silver# We'll call this _0_ - ie possibly zero space. 62094e964efSNik Silver# - When we've got an end of statement, and the next token is some 62194e964efSNik Silver# meaningful code (not a comment) and starts in the first column of text. 62294e964efSNik Silver# Then that next token is the start of the next top level statement. 62394e964efSNik Silver# We'll call this TLSS, for top level statement separator. 62494e964efSNik Silver# 62594e964efSNik Silver# We can define _1_ as 62694e964efSNik Silver# - The longest possible sequence of whitespace, delimited comments, 62794e964efSNik Silver# newlines, and line comments, as long as it ends with a whitespace 62894e964efSNik Silver# or a delimited comment, because then it won't be in the first column. 62994e964efSNik Silver# 63094e964efSNik Silver# We can define _0_ as 63194e964efSNik Silver# - _1_ or the empty string. 63294e964efSNik Silver# 63394e964efSNik Silver# We can define TLSS as 63494e964efSNik Silver# - The longest possible sequence of whitespace, delimited comments, 63594e964efSNik Silver# newlines, and line comments, as long as it ends with a newline or EOF 63694e964efSNik Silver# (and there's no more ignorable characters after that). 63794e964efSNik Silver# 63894e964efSNik Silver# PEG parsing tip: If we want to define a sequence like 'the longest 63994e964efSNik Silver# sequence of As, Bs and Cs, as long as it ends with C' we define a short 64094e964efSNik Silver# sequence like 'the longest sequence of As and Bs, then a C' and then 64194e964efSNik Silver# define 'the longest sequence of those'. 64294e964efSNik Silver 64394e964efSNik Silver_1_short <- 64494e964efSNik Silver (lineComment / NL)* (WS / delimitedComment) 64594e964efSNik Silver 64694e964efSNik Silver_1_ <- _1_short+ 64794e964efSNik Silver 64894e964efSNik Silver 64994e964efSNik Silver_0_ <- _1_ / '' 65094e964efSNik Silver 65194e964efSNik SilverTLSS_short <- 65294e964efSNik Silver (WS / lineComment / delimitedComment)* (NL / EOF) 65394e964efSNik Silver 65494e964efSNik SilverTLSS <- 65594e964efSNik Silver TLSS_short+ 65694e964efSNik Silver !(WS / lineComment / delimitedComment) 65794e964efSNik Silver 65894e964efSNik Silver# An end of statement marks the end of a top level statement, but 65994e964efSNik Silver# doesn't consume anything 66094e964efSNik Silver 66194e964efSNik SilverEOS <- &( TLSS / EOF ) 66294e964efSNik Silver 66394e964efSNik Silver# When considering lines in a let/in block we'll want to look for 66494e964efSNik Silver# a newline and an indent. There may be some delimited comments etc 66594e964efSNik Silver# in between. 66694e964efSNik Silver 66794e964efSNik Silver_NL_IND_ <- 66894e964efSNik Silver TLSS_short+ WS+ 66994e964efSNik Silver 67094e964efSNik Silver%% 67194e964efSNik Silver#include "elm_post.h" 672