diff --git a/executable_semantics/syntax/README.md b/executable_semantics/syntax/README.md
index f1854c0b5a111..ff7e65a712ac9 100644
--- a/executable_semantics/syntax/README.md
+++ b/executable_semantics/syntax/README.md
@@ -15,3 +15,134 @@ techniques can be applied to other kinds of AST nodes as needed. See the
handling of the `UNIMPL_EXAMPLE` token for an example of how this is done, and
see [`unimplemented_example_test.cpp`](unimplemented_example_test.cpp) for an
example of how to test it.
+
+## Precedence and associativity
+
+The [Bison expression grammar](parser.ypp) uses the
+[precedence climbing method](https://en.wikipedia.org/wiki/Operator-precedence_parser#Precedence_climbing_method)
+to model precedence and associativity, suitably modified to handle Carbon's
+partial precedence order without grammar ambiguities.
+
+Consider this example
+[precedence diagram](/docs/design/expressions/README.md#precedence):
+
+```mermaid
+graph BT
+ %%{init: {'themeVariables': {'fontFamily': 'monospace'}}}%%
+ minus["minus
-x"]
+ mul>"mul
x * y"]
+ add>"add
x + y"]
+ mod["mod
x % y"]
+ eq["eq
x = y"]
+
+ eq --> add & mod
+ add --> mul
+ mul & mod --> minus
+```
+
+For each precedence level, we have up to three grammar productions:
+
+- `foo_expression` represents an expression at that precedence level or
+ higher, and includes as productions all of the expression kinds that are
+ immediately higher in the precedence graph:
+ ```bison
+ add_expression:
+ mul_expression | add_lhs '+' add_operand ;
+ ```
+- `foo_operand` represents an operand of a `foo_expression` that is not itself
+ a `foo_expression`.
+ ```bison
+ eq_operand:
+ add_expression | mod_expression ;
+ ```
+- For left-associative operators, `foo_lhs` represents either a `foo_operand`
+ or a `foo_expression`.
+ ```
+ add_lhs:
+ add_operand | add_expression ;
+ ```
+
+The above approach leads to (benign) reduce-reduce conflicts. In our example
+precedence diagram, the expression `-x == y` has two different parses:
+
+- _eq_expression_
+ - _eq_operand_
+ - _add_expression_
+ - _mul_expression_
+ - _minus_expression_
+ - `-`
+ - `x`
+ - `==`
+ - _eq_operand_
+ - ...
+ - `y`
+
+and
+
+- _eq_expression_
+ - _eq_operand_
+ - _mod_expression_
+ - _minus_expression_
+ - `-`
+ - `x`
+ - `==`
+ - _eq_operand_
+ - ...
+ - `y`
+
+These would invoke the same parsing actions, so the states can be combined, but
+Bison isn't smart enough to see that.
+
+In order to eliminate these conflicts, if there are multiple paths through the
+precedence graph between a higher-precedence level `foo` and some lower
+precedence level `bar` -- that is, if there's a diamond in the precedence graph
+with `foo` at the top and `bar` at the bottom -- `foo_expression`s are excluded
+from all intermediate `_expression` productions on the diamond between `foo` and
+`bar`, and are added back in the downstream `_operand` productions in the
+diamond instead:
+
+```bison
+minus_expression:
+ identifier | '-' identifier ;
+
+// In the real grammar, trivial productions like this are inlined.
+mul_operand:
+ minus_expression ;
+mul_lhs:
+ mul_operand | mul_expression ;
+// A minus_expression is not a mul_expression, even though it's a
+// higher-precedence expression, because there are multiple paths from
+// eq_expression to minus_expression, and this production is on such a path.
+mul_expression:
+ mul_lhs '*' mul_operand
+
+// minus_expression is listed here because it is excluded from mul_expression.
+add_operand:
+ minus_expression | mul_expression ;
+// This is notionally
+// add_operand | add_expression
+// but that introduces another kind of reduce-reduce conflict, because there
+// would be two ways to interpret a mul_expression as an add_lhs.
+add_lhs:
+ minus_expression | add_expression ;
+// A mul_expression is an add_expression, because multiplication is
+// higher-precedence, and mul is not at the top of a diamond in the precedence
+// graph. minus_expression is excluded because we are within a diamond with it
+// at the top.
+add_expression:
+ mul_expression | add_lhs '+' add_operand ;
+
+mod_operand:
+ minus_expression ;
+mod_expression:
+ mod_operand '%' mod_operand ;
+
+// We add back minus_expression here because it was excluded from add_expression
+// and mod_expression.
+eq_operand:
+ minus_expression | add_expression | mod_expression ;
+// We also include minus_expression here because this is the bottom of the
+// precedence diamond.
+eq_expression:
+ minus_expression | add_expression | mod_expression | eq_operand '=' eq_operand ;
+```
diff --git a/executable_semantics/syntax/parser.ypp b/executable_semantics/syntax/parser.ypp
index f775e3ca365f0..095cdef81fc96 100644
--- a/executable_semantics/syntax/parser.ypp
+++ b/executable_semantics/syntax/parser.ypp
@@ -46,6 +46,8 @@
%parse-param {std::optional* ast}
// No shift-reduce conflicts are expected.
+// See README.md#precedence-and-associativity for a description of how
+// operator precedence is expressed.
%expect 0
// -----------------------------------------------------------------------------
@@ -109,6 +111,29 @@
%type > nonempty_block
%type > block
%type >> statement_list
+%type > primary_expression
+%type > postfix_expression
+%type > ref_deref_expression
+%type > type_expression
+%type > fn_type_expression
+%type > minus_expression
+%type > multiplicative_operand
+%type > multiplicative_lhs
+%type > multiplicative_expression
+%type > additive_operand
+%type > additive_lhs
+%type > additive_expression
+%type > unimpl_expression
+%type > value_expression
+%type > comparison_operand
+%type > comparison_expression
+%type > not_expression
+%type > predicate_expression
+%type > and_or_operand
+%type > and_lhs
+%type > and_expression
+%type > or_lhs
+%type > or_expression
%type > expression
%type > generic_binding
%type >> deduced_params
@@ -211,31 +236,6 @@
BINARY_STAR "binary *"
;
-%precedence FNARROW
-%precedence LEFT_CURLY_BRACE RIGHT_CURLY_BRACE
-%precedence COLON_BANG COLON COMMA DOUBLE_ARROW
-%left OR AND
-%nonassoc EQUAL_EQUAL
-%left PLUS MINUS
-%left BINARY_STAR
-%precedence NOT UNARY_MINUS PREFIX_STAR AMPERSAND
-// We need to give the `UNARY_STAR` token a precedence, rather than overriding
-// the precedence of the `expression UNARY_STAR` rule below, because bison
-// compares the precedence of the final token (for a shift) to the precedence
-// of the other rule (for a reduce) when attempting to resolve a shift-reduce
-// conflict. See https://stackoverflow.com/a/26188429/1041090. When UNARY_STAR
-// is the final token of a rule, it must be a postfix usage, so we give it the
-// same precedence as POSTFIX_STAR.
-%precedence POSTFIX_STAR UNARY_STAR
-%left PERIOD ARROW
-%nonassoc UNIMPL_EXAMPLE
-%precedence
- LEFT_PARENTHESIS
- RIGHT_PARENTHESIS
- LEFT_SQUARE_BRACKET
- RIGHT_SQUARE_BRACKET
-;
-
%start input
%%
input: package_directive import_directives declaration_list
@@ -275,13 +275,9 @@ api_or_impl:
| IMPL
{ $$ = false; }
;
-expression:
+primary_expression:
identifier
{ $$ = arena->New(context.source_loc(), $1); }
-| expression designator
- { $$ = arena->New(context.source_loc(), $1, $2); }
-| expression LEFT_SQUARE_BRACKET expression RIGHT_SQUARE_BRACKET
- { $$ = arena->New(context.source_loc(), $1, $3); }
| integer_literal
{ $$ = arena->New(context.source_loc(), $1); }
| string_literal
@@ -309,96 +305,184 @@ expression:
| paren_expression { $$ = $1; }
| struct_literal { $$ = $1; }
| struct_type_literal { $$ = $1; }
+;
+postfix_expression:
+ primary_expression
+| postfix_expression designator
+ { $$ = arena->New(context.source_loc(), $1, $2); }
+| postfix_expression LEFT_SQUARE_BRACKET expression RIGHT_SQUARE_BRACKET
+ { $$ = arena->New(context.source_loc(), $1, $3); }
| intrinsic_identifier tuple
{ $$ = arena->New($1, $2, context.source_loc()); }
-| expression EQUAL_EQUAL expression
+| postfix_expression tuple
+ { $$ = arena->New(context.source_loc(), $1, $2); }
+| postfix_expression POSTFIX_STAR
{
$$ = arena->New(
- context.source_loc(), Operator::Eq,
- std::vector>({$1, $3}));
+ context.source_loc(), Operator::Ptr,
+ std::vector>({$1}));
}
-| expression PLUS expression
+| postfix_expression UNARY_STAR
{
$$ = arena->New(
- context.source_loc(), Operator::Add,
- std::vector>({$1, $3}));
+ context.source_loc(), Operator::Ptr,
+ std::vector>({$1}));
}
-| expression MINUS expression
+;
+ref_deref_expression:
+ postfix_expression
+| PREFIX_STAR ref_deref_expression
{
$$ = arena->New(
- context.source_loc(), Operator::Sub,
- std::vector>({$1, $3}));
+ context.source_loc(), Operator::Deref,
+ std::vector>({$2}));
}
-| expression BINARY_STAR expression
+| UNARY_STAR ref_deref_expression
{
$$ = arena->New(
- context.source_loc(), Operator::Mul,
- std::vector>({$1, $3}));
+ context.source_loc(), Operator::Deref,
+ std::vector>({$2}));
}
-| expression AND expression
+| AMPERSAND ref_deref_expression
{
$$ = arena->New(
- context.source_loc(), Operator::And,
- std::vector>({$1, $3}));
+ context.source_loc(), Operator::AddressOf,
+ std::vector>({$2}));
}
-| expression OR expression
+;
+fn_type_expression:
+ FN_TYPE tuple ARROW type_expression
+ { $$ = arena->New(context.source_loc(), $2, $4); }
+;
+type_expression:
+ ref_deref_expression
+| fn_type_expression
+;
+minus_expression:
+ // ref_deref_expression excluded due to precedence diamond.
+ MINUS ref_deref_expression
{
$$ = arena->New(
- context.source_loc(), Operator::Or,
- std::vector>({$1, $3}));
+ context.source_loc(), Operator::Neg,
+ std::vector>({$2}));
}
-| NOT expression
+;
+multiplicative_operand:
+ ref_deref_expression
+| minus_expression
+;
+multiplicative_lhs:
+ ref_deref_expression
+| multiplicative_expression
+;
+multiplicative_expression:
+ minus_expression
+| multiplicative_lhs BINARY_STAR multiplicative_operand
{
$$ = arena->New(
- context.source_loc(), Operator::Not,
- std::vector>({$2}));
+ context.source_loc(), Operator::Mul,
+ std::vector>({$1, $3}));
}
-| MINUS expression %prec UNARY_MINUS
+;
+additive_operand:
+ ref_deref_expression
+| multiplicative_expression
+;
+additive_lhs:
+ ref_deref_expression
+| additive_expression
+;
+additive_expression:
+ multiplicative_expression
+| additive_lhs PLUS additive_operand
{
$$ = arena->New(
- context.source_loc(), Operator::Neg,
- std::vector>({$2}));
+ context.source_loc(), Operator::Add,
+ std::vector>({$1, $3}));
}
-| PREFIX_STAR expression
+| additive_lhs MINUS additive_operand
{
$$ = arena->New(
- context.source_loc(), Operator::Deref,
- std::vector>({$2}));
+ context.source_loc(), Operator::Sub,
+ std::vector>({$1, $3}));
}
-| UNARY_STAR expression %prec PREFIX_STAR
+;
+unimpl_expression:
+ // ref_deref_expression excluded due to precedence diamond.
+ ref_deref_expression UNIMPL_EXAMPLE ref_deref_expression
{
- $$ = arena->New(
- context.source_loc(), Operator::Deref,
- std::vector>({$2}));
+ $$ = arena->New(context.source_loc(),
+ "ExampleInfix", $1, $3);
}
-| AMPERSAND expression
+;
+value_expression:
+ // ref_deref_expression excluded due to precedence diamond.
+ additive_expression
+| fn_type_expression
+| unimpl_expression
+;
+comparison_operand:
+ ref_deref_expression
+| value_expression
+;
+comparison_expression:
+ value_expression
+| comparison_operand EQUAL_EQUAL comparison_operand
{
$$ = arena->New(
- context.source_loc(), Operator::AddressOf,
- std::vector>({$2}));
+ context.source_loc(), Operator::Eq,
+ std::vector>({$1, $3}));
}
-| expression tuple
- { $$ = arena->New(context.source_loc(), $1, $2); }
-| expression POSTFIX_STAR
+;
+not_expression:
+ NOT ref_deref_expression
{
$$ = arena->New(
- context.source_loc(), Operator::Ptr,
- std::vector>({$1}));
+ context.source_loc(), Operator::Not,
+ std::vector>({$2}));
}
-| expression UNARY_STAR
+;
+predicate_expression:
+ // ref_deref_expression excluded due to precedence diamond.
+ not_expression
+| comparison_expression
+;
+and_or_operand:
+ ref_deref_expression
+| predicate_expression
+;
+and_lhs:
+ and_or_operand
+| and_expression
+;
+and_expression:
+ // predicate_expression excluded due to precedence diamond.
+ and_lhs AND and_or_operand
{
$$ = arena->New(
- context.source_loc(), Operator::Ptr,
- std::vector>({$1}));
+ context.source_loc(), Operator::And,
+ std::vector>({$1, $3}));
}
-| FN_TYPE tuple ARROW expression
- { $$ = arena->New(context.source_loc(), $2, $4); }
-| expression UNIMPL_EXAMPLE expression
+;
+or_lhs:
+ and_or_operand
+| or_expression
+;
+or_expression:
+ // predicate_expression excluded due to precedence diamond.
+ or_lhs OR and_or_operand
{
- $$ = arena->New(context.source_loc(),
- "ExampleInfix", $1, $3);
+ $$ = arena->New(
+ context.source_loc(), Operator::Or,
+ std::vector>({$1, $3}));
}
;
+expression:
+ ref_deref_expression
+| predicate_expression
+| and_expression
+| or_expression
+;
designator: PERIOD identifier { $$ = $2; }
;
paren_expression: paren_expression_base
@@ -481,7 +565,7 @@ non_expression_pattern:
{ $$ = arena->New(context.source_loc(), $1, $3); }
| paren_pattern
{ $$ = $1; }
-| expression tuple_pattern
+| postfix_expression tuple_pattern
{ $$ = arena->New(context.source_loc(), $1, $2); }
;
binding_lhs:
@@ -643,9 +727,9 @@ nonempty_block:
return_term:
// Empty
{ $$ = ReturnTerm::Omitted(context.source_loc()); }
-| ARROW AUTO %prec FNARROW
+| ARROW AUTO
{ $$ = ReturnTerm::Auto(context.source_loc()); }
-| ARROW expression %prec FNARROW
+| ARROW expression
{ $$ = ReturnTerm::Explicit($2); }
;
generic_binding:
diff --git a/executable_semantics/syntax/unimplemented_example_test.cpp b/executable_semantics/syntax/unimplemented_example_test.cpp
index 3426da22ca69a..7f3e1e3d8f0c9 100644
--- a/executable_semantics/syntax/unimplemented_example_test.cpp
+++ b/executable_semantics/syntax/unimplemented_example_test.cpp
@@ -18,14 +18,14 @@ TEST(UnimplementedExampleTest, VerifyPrecedence) {
static constexpr std::string_view Program = R"(
package ExecutableSemanticsTest api;
fn Main() -> i32 {
- return 1 __unimplemented_example_infix 2 + 3;
+ return 1 __unimplemented_example_infix 2 == 3;
}
)";
Arena arena;
EXPECT_THAT(ParseFromString(&arena, "dummy.carbon", Program, false),
ParsedAs(ASTDeclarations(
ElementsAre(MatchesFunctionDeclaration().WithBody(
- BlockContentsAre(ElementsAre(MatchesReturn(MatchesAdd(
+ BlockContentsAre(ElementsAre(MatchesReturn(MatchesEq(
MatchesUnimplementedExpression(
"ExampleInfix", ElementsAre(MatchesLiteral(1),
MatchesLiteral(2))),
diff --git a/executable_semantics/testdata/basic_syntax/fail_missing_var.carbon b/executable_semantics/testdata/basic_syntax/fail_missing_var.carbon
index 529fcdcd831f7..31f8f6c0943ff 100644
--- a/executable_semantics/testdata/basic_syntax/fail_missing_var.carbon
+++ b/executable_semantics/testdata/basic_syntax/fail_missing_var.carbon
@@ -7,7 +7,7 @@
// RUN: %{not} %{executable_semantics} --trace %s 2>&1 | \
// RUN: %{FileCheck} --match-full-lines --allow-unused-prefixes %s
// AUTOUPDATE: %{executable_semantics} %s
-// CHECK: COMPILATION ERROR: {{.*}}/executable_semantics/testdata/basic_syntax/fail_missing_var.carbon:16: syntax error, unexpected COLON
+// CHECK: COMPILATION ERROR: {{.*}}/executable_semantics/testdata/basic_syntax/fail_missing_var.carbon:16: syntax error, unexpected COLON, expecting EQUAL or SEMICOLON
package ExecutableSemanticsTest api;
diff --git a/executable_semantics/testdata/basic_syntax/not_compare_precedence.carbon b/executable_semantics/testdata/basic_syntax/not_compare_precedence.carbon
new file mode 100644
index 0000000000000..0953c68978d64
--- /dev/null
+++ b/executable_semantics/testdata/basic_syntax/not_compare_precedence.carbon
@@ -0,0 +1,16 @@
+// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
+// Exceptions. See /LICENSE for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// RUN: %{not} %{executable_semantics} %s 2>&1 | \
+// RUN: %{FileCheck} --match-full-lines --allow-unused-prefixes=false %s
+// RUN: %{not} %{executable_semantics} --trace %s 2>&1 | \
+// RUN: %{FileCheck} --match-full-lines --allow-unused-prefixes %s
+// AUTOUPDATE: %{executable_semantics} %s
+
+package ExecutableSemanticsTest api;
+
+fn CompareBools(a: Bool, b: Bool) -> Bool {
+ // CHECK: COMPILATION ERROR: {{.*}}.carbon:[[@LINE+1]]: syntax error, unexpected EQUAL_EQUAL, expecting SEMICOLON
+ return not a == b;
+}