From 4ae55048390812a2b3b3da847ae604bca59cb31a Mon Sep 17 00:00:00 2001 From: Dhruv Manilawala Date: Fri, 29 Mar 2024 13:18:16 +0530 Subject: [PATCH] Implement rules around star expressions with different precedence (#10623) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary This PR adds implementations around the following grammar rules to support the other expression and statement parsing. This PR doesn't update the related expressions and statements as they will be done in their own PRs. * `star_expressions` * `star_expression` * `'*' bitwise_or` ### Problem There are two grammar entries around star expressions: ``` star_expression: | '*' bitwise_or | expression starred_expression: '*' expression ``` The main difference is the precedence at which the expression should be parsed if a `*` token is encountered. Currently, we parse both rules at the same precedence which makes the parser more lenient. There are various expressions and statements which uses either one of the above rules. ### Solutions #### Current solution Add the following new methods to the parser: 1. `parse_star_expression_list` which would be similar to `parse_expression_list` and would match the `star_expressions` grammar rule 2. `parse_star_expression_or_higher` which would match the `star_expression` or `star_named_expression` grammar rule 3. `parse_expression_with_bitwise_or_precedence` which would match the `bitwise_or` grammar rule Here, we could possibly merge the `parse_expression_list` and `parse_star_expression_list` into a single function as it only differs in the method it delegates the parsing to. The parameter could be called `allow_star_expression`. It could be confusing because even if it’s false, we would allow starred expression but at a higher binding power. This solution closely resembles the actual grammar and is easier to follow in the parser code. It doesn't require a lot of verification by the caller as the precedence parser makes sure to not allow that. #### Other solution Add a new `verify_bitwise_or_precedence` method which reports an error if the grammar expected a starred expression with `bitwise_or` precedence: ```rs pub(super) fn verify_star_expressions( &mut self, expr: &Expr, allow_named_expression: AllowNamedExpression, ) { match expr { Expr::Tuple(ast::ExprTuple { elts, .. }) => { for expr in elts { self.verify_star_expressions(expr, allow_named_expression); } } Expr::Starred(ast::ExprStarred { value, .. }) => { let expr_kind = match &**value { Expr::Compare(_) => "comparison", Expr::UnaryOp(ast::ExprUnaryOp { op: ast::UnaryOp::Not, .. }) => "unary `not`", Expr::BoolOp(_) => "boolean", Expr::If(_) => "`if-else`", Expr::Lambda(_) => "lambda", Expr::Named(_) if allow_named_expression.is_no() => "named", _ => return, }; self.add_error( ParseErrorType::OtherError(format!( "{expr_kind} expression cannot be used here", )), expr.range(), ); } _ => {} } } ``` For this solution, the parser will need to verify the precedence of a starred expression wherever grammar expected the `star_expressions`, `star_expression` or `star_named_expression` rule. We could possibly adopt this solution for better error recovery but I think for now having a strict parser is better suited because we don't really know what complete error recovery looks like. ### Follow-up There's a related follow-up issue which I'm working on and is related to starred expressions. The starred expression is only allowed in certain expressions and context which the parser needs to make sure and report an error if found otherwise. ## Test Plan Tested this locally by changing various expression and statement to use these methods. As mentioned they will be in their own PR. --- .../src/parser/expression.rs | 150 +++++++++++++++++- crates/ruff_python_parser/src/parser/mod.rs | 2 +- .../src/parser/statement.rs | 29 ++-- 3 files changed, 159 insertions(+), 22 deletions(-) diff --git a/crates/ruff_python_parser/src/parser/expression.rs b/crates/ruff_python_parser/src/parser/expression.rs index cdae92c0056503..85ee66321716fa 100644 --- a/crates/ruff_python_parser/src/parser/expression.rs +++ b/crates/ruff_python_parser/src/parser/expression.rs @@ -83,7 +83,7 @@ impl<'src> Parser<'src> { /// Matches the `expressions` rule in the [Python grammar]. /// /// [Python grammar]: https://docs.python.org/3/reference/grammar.html - pub(super) fn parse_expressions(&mut self) -> ParsedExpr { + pub(super) fn parse_expression_list(&mut self) -> ParsedExpr { let start = self.node_start(); let parsed_expr = self.parse_conditional_expression_or_higher(); @@ -100,6 +100,49 @@ impl<'src> Parser<'src> { } } + /// Parses every Python expression. + /// + /// Matches the `star_expressions` rule in the [Python grammar]. + /// + /// [Python grammar]: https://docs.python.org/3/reference/grammar.html + #[allow(dead_code)] + pub(super) fn parse_star_expression_list(&mut self) -> ParsedExpr { + let start = self.node_start(); + let parsed_expr = self.parse_star_expression_or_higher(AllowNamedExpression::No); + + if self.at(TokenKind::Comma) { + Expr::Tuple(self.parse_tuple_expression( + parsed_expr.expr, + start, + TupleParenthesized::No, + |parser| parser.parse_star_expression_or_higher(AllowNamedExpression::No), + )) + .into() + } else { + parsed_expr + } + } + + /// Parses a star expression or any other expression. + /// + /// Matches either the `star_expression` or `star_named_expression` rule in + /// the [Python grammar] depending on whether named expressions are allowed. + /// + /// [Python grammar]: https://docs.python.org/3/reference/grammar.html + #[allow(dead_code)] + pub(super) fn parse_star_expression_or_higher( + &mut self, + allow_named_expression: AllowNamedExpression, + ) -> ParsedExpr { + if self.at(TokenKind::Star) { + Expr::Starred(self.parse_starred_expression(StarredExpressionPrecedence::BitOr)).into() + } else if allow_named_expression.is_yes() { + self.parse_named_expression_or_higher() + } else { + self.parse_conditional_expression_or_higher() + } + } + /// Parses every Python expression except unparenthesized tuple. /// /// Matches the `named_expression` rule in the [Python grammar]. @@ -263,7 +306,10 @@ impl<'src> Parser<'src> { TokenKind::Plus | TokenKind::Minus | TokenKind::Not | TokenKind::Tilde => { Expr::UnaryOp(self.parse_unary_expression()).into() } - TokenKind::Star => Expr::Starred(self.parse_starred_expression()).into(), + TokenKind::Star => Expr::Starred( + self.parse_starred_expression(StarredExpressionPrecedence::Conditional), + ) + .into(), TokenKind::Await => Expr::Await(self.parse_await_expression()).into(), TokenKind::Lambda => Expr::Lambda(self.parse_lambda_expr()).into(), _ => self.parse_atom(), @@ -276,6 +322,47 @@ impl<'src> Parser<'src> { lhs } + /// Parses a bitwise `or` expression or higher. + /// + /// Matches the `bitwise_or` rule in the [Python grammar]. + /// + /// This method delegates the parsing to [`Parser::parse_expression_with_precedence`] + /// with the minimum binding power of [`Precedence::BitOr`]. The main purpose of this + /// method is to report an error when certain expressions are parsed successfully + /// but are not allowed here. + /// + /// [Python grammar]: https://docs.python.org/3/reference/grammar.html + fn parse_expression_with_bitwise_or_precedence(&mut self) -> ParsedExpr { + let parsed_expr = self.parse_expression_with_precedence(Precedence::BitOr); + + // Parenthesized expressions have a higher precedence than bitwise `or` expressions, + // so they're allowed here. + if parsed_expr.is_parenthesized { + return parsed_expr; + } + + match parsed_expr.expr { + Expr::UnaryOp(ast::ExprUnaryOp { + op: ast::UnaryOp::Not, + .. + }) => { + self.add_error( + ParseErrorType::OtherError("unary `not` expression cannot be used here".into()), + &parsed_expr, + ); + } + Expr::Lambda(_) => { + self.add_error( + ParseErrorType::OtherError("lambda expression cannot be used here".into()), + &parsed_expr, + ); + } + _ => {} + } + + parsed_expr + } + /// Parses a name. /// /// For an invalid name, the `id` field will be an empty string and the `ctx` @@ -1073,7 +1160,7 @@ impl<'src> Parser<'src> { self.bump(TokenKind::Lbrace); - let value = self.parse_expressions(); + let value = self.parse_expression_list(); if !value.is_parenthesized && value.expr.is_lambda_expr() { self.add_error( ParseErrorType::FStringError(FStringErrorType::LambdaWithoutParentheses), @@ -1438,7 +1525,7 @@ impl<'src> Parser<'src> { self.bump(TokenKind::For); let saved_context = self.set_ctx(ParserCtxFlags::FOR_TARGET); - let mut target = self.parse_expressions(); + let mut target = self.parse_expression_list(); self.restore_ctx(ParserCtxFlags::FOR_TARGET, saved_context); helpers::set_expr_ctx(&mut target.expr, ExprContext::Store); @@ -1573,10 +1660,33 @@ impl<'src> Parser<'src> { } } - fn parse_starred_expression(&mut self) -> ast::ExprStarred { + /// Parses a starred expression with the given precedence. + /// + /// If the precedence is bitwise or, the expression is parsed using the `bitwise_or` + /// rule. Otherwise, it's parsed using the `expression` rule. Refer to the + /// [Python grammar] for more information. + /// + /// # Panics + /// + /// If the parser isn't positioned at a `*` token. + /// + /// [Python grammar]: https://docs.python.org/3/reference/grammar.html + #[allow(dead_code)] + fn parse_starred_expression( + &mut self, + precedence: StarredExpressionPrecedence, + ) -> ast::ExprStarred { let start = self.node_start(); self.bump(TokenKind::Star); - let parsed_expr = self.parse_conditional_expression_or_higher(); + + let parsed_expr = match precedence { + StarredExpressionPrecedence::BitOr => { + self.parse_expression_with_bitwise_or_precedence() + } + StarredExpressionPrecedence::Conditional => { + self.parse_conditional_expression_or_higher() + } + }; ast::ExprStarred { value: Box::new(parsed_expr.expr), @@ -1617,7 +1727,7 @@ impl<'src> Parser<'src> { let value = self .at_expr() - .then(|| Box::new(self.parse_expressions().expr)); + .then(|| Box::new(self.parse_expression_list().expr)); Expr::Yield(ast::ExprYield { value, @@ -1625,9 +1735,13 @@ impl<'src> Parser<'src> { }) } + /// Parses a `yield from` expression. + /// /// See: fn parse_yield_from_expression(&mut self, start: TextSize) -> Expr { - let parsed_expr = self.parse_expressions(); + // The grammar rule to use here is `expression` but we're using `expressions` + // for better error recovery. + let parsed_expr = self.parse_expression_list(); match &parsed_expr.expr { Expr::Starred(ast::ExprStarred { value, .. }) => { @@ -1879,3 +1993,23 @@ pub(super) enum GeneratorExpressionInParentheses { expr_start: TextSize, }, } + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[allow(dead_code)] +enum StarredExpressionPrecedence { + BitOr, + Conditional, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[allow(dead_code)] +pub(super) enum AllowNamedExpression { + Yes, + No, +} + +impl AllowNamedExpression { + const fn is_yes(self) -> bool { + matches!(self, AllowNamedExpression::Yes) + } +} diff --git a/crates/ruff_python_parser/src/parser/mod.rs b/crates/ruff_python_parser/src/parser/mod.rs index b74473a2f8a4bc..9406e81b2c9b60 100644 --- a/crates/ruff_python_parser/src/parser/mod.rs +++ b/crates/ruff_python_parser/src/parser/mod.rs @@ -177,7 +177,7 @@ impl<'src> Parser<'src> { pub(crate) fn parse_program(mut self) -> Program { let ast = if self.mode == Mode::Expression { let start = self.node_start(); - let parsed_expr = self.parse_expressions(); + let parsed_expr = self.parse_expression_list(); let mut progress = ParserProgress::default(); // TODO: How should error recovery work here? Just truncate after the expression? diff --git a/crates/ruff_python_parser/src/parser/statement.rs b/crates/ruff_python_parser/src/parser/statement.rs index 4dc9807a54d325..eb51302ae0d1ce 100644 --- a/crates/ruff_python_parser/src/parser/statement.rs +++ b/crates/ruff_python_parser/src/parser/statement.rs @@ -195,7 +195,7 @@ impl<'src> Parser<'src> { } _ => { let start = self.node_start(); - let parsed_expr = self.parse_expressions(); + let parsed_expr = self.parse_expression_list(); if self.eat(TokenKind::Equal) { Stmt::Assign(self.parse_assign_statement(parsed_expr, start)) @@ -271,7 +271,7 @@ impl<'src> Parser<'src> { let value = self .at_expr() - .then(|| Box::new(self.parse_expressions().expr)); + .then(|| Box::new(self.parse_expression_list().expr)); ast::StmtReturn { range: self.node_range(start), @@ -287,7 +287,7 @@ impl<'src> Parser<'src> { let exc = if self.at(TokenKind::Newline) { None } else { - let exc = self.parse_expressions(); + let exc = self.parse_expression_list(); if let Expr::Tuple(node) = &exc.expr { if !node.parenthesized { @@ -304,7 +304,7 @@ impl<'src> Parser<'src> { }; let cause = (exc.is_some() && self.eat(TokenKind::From)).then(|| { - let cause = self.parse_expressions(); + let cause = self.parse_expression_list(); if let Expr::Tuple(ast::ExprTuple { parenthesized: false, @@ -569,13 +569,13 @@ impl<'src> Parser<'src> { /// See: fn parse_assign_statement(&mut self, target: ParsedExpr, start: TextSize) -> ast::StmtAssign { let mut targets = vec![target.expr]; - let mut value = self.parse_expressions(); + let mut value = self.parse_expression_list(); if self.at(TokenKind::Equal) { self.parse_list(RecoveryContextKind::AssignmentTargets, |parser| { parser.bump(TokenKind::Equal); - let mut parsed_expr = parser.parse_expressions(); + let mut parsed_expr = parser.parse_expression_list(); std::mem::swap(&mut value, &mut parsed_expr); @@ -625,7 +625,10 @@ impl<'src> Parser<'src> { helpers::set_expr_ctx(&mut target.expr, ExprContext::Store); let simple = target.expr.is_name_expr() && !target.is_parenthesized; - let annotation = self.parse_expressions(); + + // Annotation is actually just an `expression` but we use `expressions` + // for better error recovery in case tuple isn't parenthesized. + let annotation = self.parse_expression_list(); if matches!( annotation.expr, @@ -642,7 +645,7 @@ impl<'src> Parser<'src> { let value = self .eat(TokenKind::Equal) - .then(|| Box::new(self.parse_expressions().expr)); + .then(|| Box::new(self.parse_expression_list().expr)); ast::StmtAnnAssign { target: Box::new(target.expr), @@ -678,7 +681,7 @@ impl<'src> Parser<'src> { helpers::set_expr_ctx(&mut target.expr, ExprContext::Store); - let value = self.parse_expressions(); + let value = self.parse_expression_list(); ast::StmtAugAssign { target: Box::new(target.expr), @@ -766,7 +769,7 @@ impl<'src> Parser<'src> { let type_ = if p.at(TokenKind::Colon) && !is_star { None } else { - let parsed_expr = p.parse_expressions(); + let parsed_expr = p.parse_expression_list(); if matches!( parsed_expr.expr, Expr::Tuple(ast::ExprTuple { @@ -839,14 +842,14 @@ impl<'src> Parser<'src> { self.bump(TokenKind::For); let saved_context = self.set_ctx(ParserCtxFlags::FOR_TARGET); - let mut target = self.parse_expressions(); + let mut target = self.parse_expression_list(); self.restore_ctx(ParserCtxFlags::FOR_TARGET, saved_context); helpers::set_expr_ctx(&mut target.expr, ExprContext::Store); self.expect(TokenKind::In); - let iter = self.parse_expressions(); + let iter = self.parse_expression_list(); self.expect(TokenKind::Colon); @@ -911,7 +914,7 @@ impl<'src> Parser<'src> { parameters.range = self.node_range(parameters_start); let returns = self.eat(TokenKind::Rarrow).then(|| { - let returns = self.parse_expressions(); + let returns = self.parse_expression_list(); if !returns.is_parenthesized && matches!(returns.expr, Expr::Tuple(_)) { self.add_error( ParseErrorType::OtherError(