diff --git a/executable_semantics/syntax/README.md b/executable_semantics/syntax/README.md index f1854c0b5a111..ff7e65a712ac9 100644 --- a/executable_semantics/syntax/README.md +++ b/executable_semantics/syntax/README.md @@ -15,3 +15,134 @@ techniques can be applied to other kinds of AST nodes as needed. See the handling of the `UNIMPL_EXAMPLE` token for an example of how this is done, and see [`unimplemented_example_test.cpp`](unimplemented_example_test.cpp) for an example of how to test it. + +## Precedence and associativity + +The [Bison expression grammar](parser.ypp) uses the +[precedence climbing method](https://en.wikipedia.org/wiki/Operator-precedence_parser#Precedence_climbing_method) +to model precedence and associativity, suitably modified to handle Carbon's +partial precedence order without grammar ambiguities. + +Consider this example +[precedence diagram](/docs/design/expressions/README.md#precedence): + +```mermaid +graph BT + %%{init: {'themeVariables': {'fontFamily': 'monospace'}}}%% + minus["minus
-x"] + mul>"mul
x * y"] + add>"add
x + y"] + mod["mod
x % y"] + eq["eq
x = y"] + + eq --> add & mod + add --> mul + mul & mod --> minus +``` + +For each precedence level, we have up to three grammar productions: + +- `foo_expression` represents an expression at that precedence level or + higher, and includes as productions all of the expression kinds that are + immediately higher in the precedence graph: + ```bison + add_expression: + mul_expression | add_lhs '+' add_operand ; + ``` +- `foo_operand` represents an operand of a `foo_expression` that is not itself + a `foo_expression`. + ```bison + eq_operand: + add_expression | mod_expression ; + ``` +- For left-associative operators, `foo_lhs` represents either a `foo_operand` + or a `foo_expression`. + ``` + add_lhs: + add_operand | add_expression ; + ``` + +The above approach leads to (benign) reduce-reduce conflicts. In our example +precedence diagram, the expression `-x == y` has two different parses: + +- _eq_expression_ + - _eq_operand_ + - _add_expression_ + - _mul_expression_ + - _minus_expression_ + - `-` + - `x` + - `==` + - _eq_operand_ + - ... + - `y` + +and + +- _eq_expression_ + - _eq_operand_ + - _mod_expression_ + - _minus_expression_ + - `-` + - `x` + - `==` + - _eq_operand_ + - ... + - `y` + +These would invoke the same parsing actions, so the states can be combined, but +Bison isn't smart enough to see that. + +In order to eliminate these conflicts, if there are multiple paths through the +precedence graph between a higher-precedence level `foo` and some lower +precedence level `bar` -- that is, if there's a diamond in the precedence graph +with `foo` at the top and `bar` at the bottom -- `foo_expression`s are excluded +from all intermediate `_expression` productions on the diamond between `foo` and +`bar`, and are added back in the downstream `_operand` productions in the +diamond instead: + +```bison +minus_expression: + identifier | '-' identifier ; + +// In the real grammar, trivial productions like this are inlined. +mul_operand: + minus_expression ; +mul_lhs: + mul_operand | mul_expression ; +// A minus_expression is not a mul_expression, even though it's a +// higher-precedence expression, because there are multiple paths from +// eq_expression to minus_expression, and this production is on such a path. +mul_expression: + mul_lhs '*' mul_operand + +// minus_expression is listed here because it is excluded from mul_expression. +add_operand: + minus_expression | mul_expression ; +// This is notionally +// add_operand | add_expression +// but that introduces another kind of reduce-reduce conflict, because there +// would be two ways to interpret a mul_expression as an add_lhs. +add_lhs: + minus_expression | add_expression ; +// A mul_expression is an add_expression, because multiplication is +// higher-precedence, and mul is not at the top of a diamond in the precedence +// graph. minus_expression is excluded because we are within a diamond with it +// at the top. +add_expression: + mul_expression | add_lhs '+' add_operand ; + +mod_operand: + minus_expression ; +mod_expression: + mod_operand '%' mod_operand ; + +// We add back minus_expression here because it was excluded from add_expression +// and mod_expression. +eq_operand: + minus_expression | add_expression | mod_expression ; +// We also include minus_expression here because this is the bottom of the +// precedence diamond. +eq_expression: + minus_expression | add_expression | mod_expression | eq_operand '=' eq_operand ; +``` diff --git a/executable_semantics/syntax/parser.ypp b/executable_semantics/syntax/parser.ypp index f775e3ca365f0..095cdef81fc96 100644 --- a/executable_semantics/syntax/parser.ypp +++ b/executable_semantics/syntax/parser.ypp @@ -46,6 +46,8 @@ %parse-param {std::optional* ast} // No shift-reduce conflicts are expected. +// See README.md#precedence-and-associativity for a description of how +// operator precedence is expressed. %expect 0 // ----------------------------------------------------------------------------- @@ -109,6 +111,29 @@ %type > nonempty_block %type > block %type >> statement_list +%type > primary_expression +%type > postfix_expression +%type > ref_deref_expression +%type > type_expression +%type > fn_type_expression +%type > minus_expression +%type > multiplicative_operand +%type > multiplicative_lhs +%type > multiplicative_expression +%type > additive_operand +%type > additive_lhs +%type > additive_expression +%type > unimpl_expression +%type > value_expression +%type > comparison_operand +%type > comparison_expression +%type > not_expression +%type > predicate_expression +%type > and_or_operand +%type > and_lhs +%type > and_expression +%type > or_lhs +%type > or_expression %type > expression %type > generic_binding %type >> deduced_params @@ -211,31 +236,6 @@ BINARY_STAR "binary *" ; -%precedence FNARROW -%precedence LEFT_CURLY_BRACE RIGHT_CURLY_BRACE -%precedence COLON_BANG COLON COMMA DOUBLE_ARROW -%left OR AND -%nonassoc EQUAL_EQUAL -%left PLUS MINUS -%left BINARY_STAR -%precedence NOT UNARY_MINUS PREFIX_STAR AMPERSAND -// We need to give the `UNARY_STAR` token a precedence, rather than overriding -// the precedence of the `expression UNARY_STAR` rule below, because bison -// compares the precedence of the final token (for a shift) to the precedence -// of the other rule (for a reduce) when attempting to resolve a shift-reduce -// conflict. See https://stackoverflow.com/a/26188429/1041090. When UNARY_STAR -// is the final token of a rule, it must be a postfix usage, so we give it the -// same precedence as POSTFIX_STAR. -%precedence POSTFIX_STAR UNARY_STAR -%left PERIOD ARROW -%nonassoc UNIMPL_EXAMPLE -%precedence - LEFT_PARENTHESIS - RIGHT_PARENTHESIS - LEFT_SQUARE_BRACKET - RIGHT_SQUARE_BRACKET -; - %start input %% input: package_directive import_directives declaration_list @@ -275,13 +275,9 @@ api_or_impl: | IMPL { $$ = false; } ; -expression: +primary_expression: identifier { $$ = arena->New(context.source_loc(), $1); } -| expression designator - { $$ = arena->New(context.source_loc(), $1, $2); } -| expression LEFT_SQUARE_BRACKET expression RIGHT_SQUARE_BRACKET - { $$ = arena->New(context.source_loc(), $1, $3); } | integer_literal { $$ = arena->New(context.source_loc(), $1); } | string_literal @@ -309,96 +305,184 @@ expression: | paren_expression { $$ = $1; } | struct_literal { $$ = $1; } | struct_type_literal { $$ = $1; } +; +postfix_expression: + primary_expression +| postfix_expression designator + { $$ = arena->New(context.source_loc(), $1, $2); } +| postfix_expression LEFT_SQUARE_BRACKET expression RIGHT_SQUARE_BRACKET + { $$ = arena->New(context.source_loc(), $1, $3); } | intrinsic_identifier tuple { $$ = arena->New($1, $2, context.source_loc()); } -| expression EQUAL_EQUAL expression +| postfix_expression tuple + { $$ = arena->New(context.source_loc(), $1, $2); } +| postfix_expression POSTFIX_STAR { $$ = arena->New( - context.source_loc(), Operator::Eq, - std::vector>({$1, $3})); + context.source_loc(), Operator::Ptr, + std::vector>({$1})); } -| expression PLUS expression +| postfix_expression UNARY_STAR { $$ = arena->New( - context.source_loc(), Operator::Add, - std::vector>({$1, $3})); + context.source_loc(), Operator::Ptr, + std::vector>({$1})); } -| expression MINUS expression +; +ref_deref_expression: + postfix_expression +| PREFIX_STAR ref_deref_expression { $$ = arena->New( - context.source_loc(), Operator::Sub, - std::vector>({$1, $3})); + context.source_loc(), Operator::Deref, + std::vector>({$2})); } -| expression BINARY_STAR expression +| UNARY_STAR ref_deref_expression { $$ = arena->New( - context.source_loc(), Operator::Mul, - std::vector>({$1, $3})); + context.source_loc(), Operator::Deref, + std::vector>({$2})); } -| expression AND expression +| AMPERSAND ref_deref_expression { $$ = arena->New( - context.source_loc(), Operator::And, - std::vector>({$1, $3})); + context.source_loc(), Operator::AddressOf, + std::vector>({$2})); } -| expression OR expression +; +fn_type_expression: + FN_TYPE tuple ARROW type_expression + { $$ = arena->New(context.source_loc(), $2, $4); } +; +type_expression: + ref_deref_expression +| fn_type_expression +; +minus_expression: + // ref_deref_expression excluded due to precedence diamond. + MINUS ref_deref_expression { $$ = arena->New( - context.source_loc(), Operator::Or, - std::vector>({$1, $3})); + context.source_loc(), Operator::Neg, + std::vector>({$2})); } -| NOT expression +; +multiplicative_operand: + ref_deref_expression +| minus_expression +; +multiplicative_lhs: + ref_deref_expression +| multiplicative_expression +; +multiplicative_expression: + minus_expression +| multiplicative_lhs BINARY_STAR multiplicative_operand { $$ = arena->New( - context.source_loc(), Operator::Not, - std::vector>({$2})); + context.source_loc(), Operator::Mul, + std::vector>({$1, $3})); } -| MINUS expression %prec UNARY_MINUS +; +additive_operand: + ref_deref_expression +| multiplicative_expression +; +additive_lhs: + ref_deref_expression +| additive_expression +; +additive_expression: + multiplicative_expression +| additive_lhs PLUS additive_operand { $$ = arena->New( - context.source_loc(), Operator::Neg, - std::vector>({$2})); + context.source_loc(), Operator::Add, + std::vector>({$1, $3})); } -| PREFIX_STAR expression +| additive_lhs MINUS additive_operand { $$ = arena->New( - context.source_loc(), Operator::Deref, - std::vector>({$2})); + context.source_loc(), Operator::Sub, + std::vector>({$1, $3})); } -| UNARY_STAR expression %prec PREFIX_STAR +; +unimpl_expression: + // ref_deref_expression excluded due to precedence diamond. + ref_deref_expression UNIMPL_EXAMPLE ref_deref_expression { - $$ = arena->New( - context.source_loc(), Operator::Deref, - std::vector>({$2})); + $$ = arena->New(context.source_loc(), + "ExampleInfix", $1, $3); } -| AMPERSAND expression +; +value_expression: + // ref_deref_expression excluded due to precedence diamond. + additive_expression +| fn_type_expression +| unimpl_expression +; +comparison_operand: + ref_deref_expression +| value_expression +; +comparison_expression: + value_expression +| comparison_operand EQUAL_EQUAL comparison_operand { $$ = arena->New( - context.source_loc(), Operator::AddressOf, - std::vector>({$2})); + context.source_loc(), Operator::Eq, + std::vector>({$1, $3})); } -| expression tuple - { $$ = arena->New(context.source_loc(), $1, $2); } -| expression POSTFIX_STAR +; +not_expression: + NOT ref_deref_expression { $$ = arena->New( - context.source_loc(), Operator::Ptr, - std::vector>({$1})); + context.source_loc(), Operator::Not, + std::vector>({$2})); } -| expression UNARY_STAR +; +predicate_expression: + // ref_deref_expression excluded due to precedence diamond. + not_expression +| comparison_expression +; +and_or_operand: + ref_deref_expression +| predicate_expression +; +and_lhs: + and_or_operand +| and_expression +; +and_expression: + // predicate_expression excluded due to precedence diamond. + and_lhs AND and_or_operand { $$ = arena->New( - context.source_loc(), Operator::Ptr, - std::vector>({$1})); + context.source_loc(), Operator::And, + std::vector>({$1, $3})); } -| FN_TYPE tuple ARROW expression - { $$ = arena->New(context.source_loc(), $2, $4); } -| expression UNIMPL_EXAMPLE expression +; +or_lhs: + and_or_operand +| or_expression +; +or_expression: + // predicate_expression excluded due to precedence diamond. + or_lhs OR and_or_operand { - $$ = arena->New(context.source_loc(), - "ExampleInfix", $1, $3); + $$ = arena->New( + context.source_loc(), Operator::Or, + std::vector>({$1, $3})); } ; +expression: + ref_deref_expression +| predicate_expression +| and_expression +| or_expression +; designator: PERIOD identifier { $$ = $2; } ; paren_expression: paren_expression_base @@ -481,7 +565,7 @@ non_expression_pattern: { $$ = arena->New(context.source_loc(), $1, $3); } | paren_pattern { $$ = $1; } -| expression tuple_pattern +| postfix_expression tuple_pattern { $$ = arena->New(context.source_loc(), $1, $2); } ; binding_lhs: @@ -643,9 +727,9 @@ nonempty_block: return_term: // Empty { $$ = ReturnTerm::Omitted(context.source_loc()); } -| ARROW AUTO %prec FNARROW +| ARROW AUTO { $$ = ReturnTerm::Auto(context.source_loc()); } -| ARROW expression %prec FNARROW +| ARROW expression { $$ = ReturnTerm::Explicit($2); } ; generic_binding: diff --git a/executable_semantics/syntax/unimplemented_example_test.cpp b/executable_semantics/syntax/unimplemented_example_test.cpp index 3426da22ca69a..7f3e1e3d8f0c9 100644 --- a/executable_semantics/syntax/unimplemented_example_test.cpp +++ b/executable_semantics/syntax/unimplemented_example_test.cpp @@ -18,14 +18,14 @@ TEST(UnimplementedExampleTest, VerifyPrecedence) { static constexpr std::string_view Program = R"( package ExecutableSemanticsTest api; fn Main() -> i32 { - return 1 __unimplemented_example_infix 2 + 3; + return 1 __unimplemented_example_infix 2 == 3; } )"; Arena arena; EXPECT_THAT(ParseFromString(&arena, "dummy.carbon", Program, false), ParsedAs(ASTDeclarations( ElementsAre(MatchesFunctionDeclaration().WithBody( - BlockContentsAre(ElementsAre(MatchesReturn(MatchesAdd( + BlockContentsAre(ElementsAre(MatchesReturn(MatchesEq( MatchesUnimplementedExpression( "ExampleInfix", ElementsAre(MatchesLiteral(1), MatchesLiteral(2))), diff --git a/executable_semantics/testdata/basic_syntax/fail_missing_var.carbon b/executable_semantics/testdata/basic_syntax/fail_missing_var.carbon index 529fcdcd831f7..31f8f6c0943ff 100644 --- a/executable_semantics/testdata/basic_syntax/fail_missing_var.carbon +++ b/executable_semantics/testdata/basic_syntax/fail_missing_var.carbon @@ -7,7 +7,7 @@ // RUN: %{not} %{executable_semantics} --trace %s 2>&1 | \ // RUN: %{FileCheck} --match-full-lines --allow-unused-prefixes %s // AUTOUPDATE: %{executable_semantics} %s -// CHECK: COMPILATION ERROR: {{.*}}/executable_semantics/testdata/basic_syntax/fail_missing_var.carbon:16: syntax error, unexpected COLON +// CHECK: COMPILATION ERROR: {{.*}}/executable_semantics/testdata/basic_syntax/fail_missing_var.carbon:16: syntax error, unexpected COLON, expecting EQUAL or SEMICOLON package ExecutableSemanticsTest api; diff --git a/executable_semantics/testdata/basic_syntax/not_compare_precedence.carbon b/executable_semantics/testdata/basic_syntax/not_compare_precedence.carbon new file mode 100644 index 0000000000000..0953c68978d64 --- /dev/null +++ b/executable_semantics/testdata/basic_syntax/not_compare_precedence.carbon @@ -0,0 +1,16 @@ +// Part of the Carbon Language project, under the Apache License v2.0 with LLVM +// Exceptions. See /LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// RUN: %{not} %{executable_semantics} %s 2>&1 | \ +// RUN: %{FileCheck} --match-full-lines --allow-unused-prefixes=false %s +// RUN: %{not} %{executable_semantics} --trace %s 2>&1 | \ +// RUN: %{FileCheck} --match-full-lines --allow-unused-prefixes %s +// AUTOUPDATE: %{executable_semantics} %s + +package ExecutableSemanticsTest api; + +fn CompareBools(a: Bool, b: Bool) -> Bool { + // CHECK: COMPILATION ERROR: {{.*}}.carbon:[[@LINE+1]]: syntax error, unexpected EQUAL_EQUAL, expecting SEMICOLON + return not a == b; +}