From 27516a7dbded383de3a669cc7ee58aa2d1313890 Mon Sep 17 00:00:00 2001 From: Dhruv Manilawala Date: Wed, 27 Mar 2024 12:58:30 +0530 Subject: [PATCH] Implement rules around star expressions with different precedence --- .../src/parser/expression.rs | 150 +++++++++++++++++- crates/ruff_python_parser/src/parser/mod.rs | 2 +- .../src/parser/statement.rs | 29 ++-- 3 files changed, 159 insertions(+), 22 deletions(-) diff --git a/crates/ruff_python_parser/src/parser/expression.rs b/crates/ruff_python_parser/src/parser/expression.rs index cdae92c005650..85ee66321716f 100644 --- a/crates/ruff_python_parser/src/parser/expression.rs +++ b/crates/ruff_python_parser/src/parser/expression.rs @@ -83,7 +83,7 @@ impl<'src> Parser<'src> { /// Matches the `expressions` rule in the [Python grammar]. /// /// [Python grammar]: https://docs.python.org/3/reference/grammar.html - pub(super) fn parse_expressions(&mut self) -> ParsedExpr { + pub(super) fn parse_expression_list(&mut self) -> ParsedExpr { let start = self.node_start(); let parsed_expr = self.parse_conditional_expression_or_higher(); @@ -100,6 +100,49 @@ impl<'src> Parser<'src> { } } + /// Parses every Python expression. + /// + /// Matches the `star_expressions` rule in the [Python grammar]. + /// + /// [Python grammar]: https://docs.python.org/3/reference/grammar.html + #[allow(dead_code)] + pub(super) fn parse_star_expression_list(&mut self) -> ParsedExpr { + let start = self.node_start(); + let parsed_expr = self.parse_star_expression_or_higher(AllowNamedExpression::No); + + if self.at(TokenKind::Comma) { + Expr::Tuple(self.parse_tuple_expression( + parsed_expr.expr, + start, + TupleParenthesized::No, + |parser| parser.parse_star_expression_or_higher(AllowNamedExpression::No), + )) + .into() + } else { + parsed_expr + } + } + + /// Parses a star expression or any other expression. + /// + /// Matches either the `star_expression` or `star_named_expression` rule in + /// the [Python grammar] depending on whether named expressions are allowed. + /// + /// [Python grammar]: https://docs.python.org/3/reference/grammar.html + #[allow(dead_code)] + pub(super) fn parse_star_expression_or_higher( + &mut self, + allow_named_expression: AllowNamedExpression, + ) -> ParsedExpr { + if self.at(TokenKind::Star) { + Expr::Starred(self.parse_starred_expression(StarredExpressionPrecedence::BitOr)).into() + } else if allow_named_expression.is_yes() { + self.parse_named_expression_or_higher() + } else { + self.parse_conditional_expression_or_higher() + } + } + /// Parses every Python expression except unparenthesized tuple. /// /// Matches the `named_expression` rule in the [Python grammar]. @@ -263,7 +306,10 @@ impl<'src> Parser<'src> { TokenKind::Plus | TokenKind::Minus | TokenKind::Not | TokenKind::Tilde => { Expr::UnaryOp(self.parse_unary_expression()).into() } - TokenKind::Star => Expr::Starred(self.parse_starred_expression()).into(), + TokenKind::Star => Expr::Starred( + self.parse_starred_expression(StarredExpressionPrecedence::Conditional), + ) + .into(), TokenKind::Await => Expr::Await(self.parse_await_expression()).into(), TokenKind::Lambda => Expr::Lambda(self.parse_lambda_expr()).into(), _ => self.parse_atom(), @@ -276,6 +322,47 @@ impl<'src> Parser<'src> { lhs } + /// Parses a bitwise `or` expression or higher. + /// + /// Matches the `bitwise_or` rule in the [Python grammar]. + /// + /// This method delegates the parsing to [`Parser::parse_expression_with_precedence`] + /// with the minimum binding power of [`Precedence::BitOr`]. The main purpose of this + /// method is to report an error when certain expressions are parsed successfully + /// but are not allowed here. + /// + /// [Python grammar]: https://docs.python.org/3/reference/grammar.html + fn parse_expression_with_bitwise_or_precedence(&mut self) -> ParsedExpr { + let parsed_expr = self.parse_expression_with_precedence(Precedence::BitOr); + + // Parenthesized expressions have a higher precedence than bitwise `or` expressions, + // so they're allowed here. + if parsed_expr.is_parenthesized { + return parsed_expr; + } + + match parsed_expr.expr { + Expr::UnaryOp(ast::ExprUnaryOp { + op: ast::UnaryOp::Not, + .. + }) => { + self.add_error( + ParseErrorType::OtherError("unary `not` expression cannot be used here".into()), + &parsed_expr, + ); + } + Expr::Lambda(_) => { + self.add_error( + ParseErrorType::OtherError("lambda expression cannot be used here".into()), + &parsed_expr, + ); + } + _ => {} + } + + parsed_expr + } + /// Parses a name. /// /// For an invalid name, the `id` field will be an empty string and the `ctx` @@ -1073,7 +1160,7 @@ impl<'src> Parser<'src> { self.bump(TokenKind::Lbrace); - let value = self.parse_expressions(); + let value = self.parse_expression_list(); if !value.is_parenthesized && value.expr.is_lambda_expr() { self.add_error( ParseErrorType::FStringError(FStringErrorType::LambdaWithoutParentheses), @@ -1438,7 +1525,7 @@ impl<'src> Parser<'src> { self.bump(TokenKind::For); let saved_context = self.set_ctx(ParserCtxFlags::FOR_TARGET); - let mut target = self.parse_expressions(); + let mut target = self.parse_expression_list(); self.restore_ctx(ParserCtxFlags::FOR_TARGET, saved_context); helpers::set_expr_ctx(&mut target.expr, ExprContext::Store); @@ -1573,10 +1660,33 @@ impl<'src> Parser<'src> { } } - fn parse_starred_expression(&mut self) -> ast::ExprStarred { + /// Parses a starred expression with the given precedence. + /// + /// If the precedence is bitwise or, the expression is parsed using the `bitwise_or` + /// rule. Otherwise, it's parsed using the `expression` rule. Refer to the + /// [Python grammar] for more information. + /// + /// # Panics + /// + /// If the parser isn't positioned at a `*` token. + /// + /// [Python grammar]: https://docs.python.org/3/reference/grammar.html + #[allow(dead_code)] + fn parse_starred_expression( + &mut self, + precedence: StarredExpressionPrecedence, + ) -> ast::ExprStarred { let start = self.node_start(); self.bump(TokenKind::Star); - let parsed_expr = self.parse_conditional_expression_or_higher(); + + let parsed_expr = match precedence { + StarredExpressionPrecedence::BitOr => { + self.parse_expression_with_bitwise_or_precedence() + } + StarredExpressionPrecedence::Conditional => { + self.parse_conditional_expression_or_higher() + } + }; ast::ExprStarred { value: Box::new(parsed_expr.expr), @@ -1617,7 +1727,7 @@ impl<'src> Parser<'src> { let value = self .at_expr() - .then(|| Box::new(self.parse_expressions().expr)); + .then(|| Box::new(self.parse_expression_list().expr)); Expr::Yield(ast::ExprYield { value, @@ -1625,9 +1735,13 @@ impl<'src> Parser<'src> { }) } + /// Parses a `yield from` expression. + /// /// See: fn parse_yield_from_expression(&mut self, start: TextSize) -> Expr { - let parsed_expr = self.parse_expressions(); + // The grammar rule to use here is `expression` but we're using `expressions` + // for better error recovery. + let parsed_expr = self.parse_expression_list(); match &parsed_expr.expr { Expr::Starred(ast::ExprStarred { value, .. }) => { @@ -1879,3 +1993,23 @@ pub(super) enum GeneratorExpressionInParentheses { expr_start: TextSize, }, } + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[allow(dead_code)] +enum StarredExpressionPrecedence { + BitOr, + Conditional, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[allow(dead_code)] +pub(super) enum AllowNamedExpression { + Yes, + No, +} + +impl AllowNamedExpression { + const fn is_yes(self) -> bool { + matches!(self, AllowNamedExpression::Yes) + } +} diff --git a/crates/ruff_python_parser/src/parser/mod.rs b/crates/ruff_python_parser/src/parser/mod.rs index b74473a2f8a4b..9406e81b2c9b6 100644 --- a/crates/ruff_python_parser/src/parser/mod.rs +++ b/crates/ruff_python_parser/src/parser/mod.rs @@ -177,7 +177,7 @@ impl<'src> Parser<'src> { pub(crate) fn parse_program(mut self) -> Program { let ast = if self.mode == Mode::Expression { let start = self.node_start(); - let parsed_expr = self.parse_expressions(); + let parsed_expr = self.parse_expression_list(); let mut progress = ParserProgress::default(); // TODO: How should error recovery work here? Just truncate after the expression? diff --git a/crates/ruff_python_parser/src/parser/statement.rs b/crates/ruff_python_parser/src/parser/statement.rs index 4dc9807a54d32..eb51302ae0d1c 100644 --- a/crates/ruff_python_parser/src/parser/statement.rs +++ b/crates/ruff_python_parser/src/parser/statement.rs @@ -195,7 +195,7 @@ impl<'src> Parser<'src> { } _ => { let start = self.node_start(); - let parsed_expr = self.parse_expressions(); + let parsed_expr = self.parse_expression_list(); if self.eat(TokenKind::Equal) { Stmt::Assign(self.parse_assign_statement(parsed_expr, start)) @@ -271,7 +271,7 @@ impl<'src> Parser<'src> { let value = self .at_expr() - .then(|| Box::new(self.parse_expressions().expr)); + .then(|| Box::new(self.parse_expression_list().expr)); ast::StmtReturn { range: self.node_range(start), @@ -287,7 +287,7 @@ impl<'src> Parser<'src> { let exc = if self.at(TokenKind::Newline) { None } else { - let exc = self.parse_expressions(); + let exc = self.parse_expression_list(); if let Expr::Tuple(node) = &exc.expr { if !node.parenthesized { @@ -304,7 +304,7 @@ impl<'src> Parser<'src> { }; let cause = (exc.is_some() && self.eat(TokenKind::From)).then(|| { - let cause = self.parse_expressions(); + let cause = self.parse_expression_list(); if let Expr::Tuple(ast::ExprTuple { parenthesized: false, @@ -569,13 +569,13 @@ impl<'src> Parser<'src> { /// See: fn parse_assign_statement(&mut self, target: ParsedExpr, start: TextSize) -> ast::StmtAssign { let mut targets = vec![target.expr]; - let mut value = self.parse_expressions(); + let mut value = self.parse_expression_list(); if self.at(TokenKind::Equal) { self.parse_list(RecoveryContextKind::AssignmentTargets, |parser| { parser.bump(TokenKind::Equal); - let mut parsed_expr = parser.parse_expressions(); + let mut parsed_expr = parser.parse_expression_list(); std::mem::swap(&mut value, &mut parsed_expr); @@ -625,7 +625,10 @@ impl<'src> Parser<'src> { helpers::set_expr_ctx(&mut target.expr, ExprContext::Store); let simple = target.expr.is_name_expr() && !target.is_parenthesized; - let annotation = self.parse_expressions(); + + // Annotation is actually just an `expression` but we use `expressions` + // for better error recovery in case tuple isn't parenthesized. + let annotation = self.parse_expression_list(); if matches!( annotation.expr, @@ -642,7 +645,7 @@ impl<'src> Parser<'src> { let value = self .eat(TokenKind::Equal) - .then(|| Box::new(self.parse_expressions().expr)); + .then(|| Box::new(self.parse_expression_list().expr)); ast::StmtAnnAssign { target: Box::new(target.expr), @@ -678,7 +681,7 @@ impl<'src> Parser<'src> { helpers::set_expr_ctx(&mut target.expr, ExprContext::Store); - let value = self.parse_expressions(); + let value = self.parse_expression_list(); ast::StmtAugAssign { target: Box::new(target.expr), @@ -766,7 +769,7 @@ impl<'src> Parser<'src> { let type_ = if p.at(TokenKind::Colon) && !is_star { None } else { - let parsed_expr = p.parse_expressions(); + let parsed_expr = p.parse_expression_list(); if matches!( parsed_expr.expr, Expr::Tuple(ast::ExprTuple { @@ -839,14 +842,14 @@ impl<'src> Parser<'src> { self.bump(TokenKind::For); let saved_context = self.set_ctx(ParserCtxFlags::FOR_TARGET); - let mut target = self.parse_expressions(); + let mut target = self.parse_expression_list(); self.restore_ctx(ParserCtxFlags::FOR_TARGET, saved_context); helpers::set_expr_ctx(&mut target.expr, ExprContext::Store); self.expect(TokenKind::In); - let iter = self.parse_expressions(); + let iter = self.parse_expression_list(); self.expect(TokenKind::Colon); @@ -911,7 +914,7 @@ impl<'src> Parser<'src> { parameters.range = self.node_range(parameters_start); let returns = self.eat(TokenKind::Rarrow).then(|| { - let returns = self.parse_expressions(); + let returns = self.parse_expression_list(); if !returns.is_parenthesized && matches!(returns.expr, Expr::Tuple(_)) { self.add_error( ParseErrorType::OtherError(