From 17ceb5dcb35ff9c0f84c32133fdb94b06c69164a Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Mon, 25 Sep 2023 10:21:44 -0400 Subject: [PATCH] Preserve newlines after nested compound statements (#7608) ## Summary Given: ```python if True: if True: pass else: pass # a # b # c else: pass ``` We want to preserve the newline after the `# c` (before the `else`). However, the `last_node` ends at the `pass`, and the comments are trailing comments on the `pass`, not trailing comments on the `last_node` (the `if`). As such, when counting the trailing newlines on the outer `if`, we abort as soon as we see the comment (`# a`). This PR changes the logic to skip _all_ comments (even those with newlines between them). This is safe as we know that there are no "leading" comments on the `else`, so there's no risk of skipping those accidentally. Closes https://github.com/astral-sh/ruff/issues/7602. ## Test Plan No change in compatibility. Before: | project | similarity index | total files | changed files | |--------------|------------------:|------------------:|------------------:| | cpython | 0.76083 | 1789 | 1631 | | django | 0.99983 | 2760 | 36 | | transformers | 0.99963 | 2587 | 319 | | twine | 1.00000 | 33 | 0 | | typeshed | 0.99979 | 3496 | 22 | | warehouse | 0.99967 | 648 | 15 | | zulip | 0.99972 | 1437 | 21 | After: | project | similarity index | total files | changed files | |--------------|------------------:|------------------:|------------------:| | cpython | 0.76083 | 1789 | 1631 | | django | 0.99983 | 2760 | 36 | | transformers | 0.99963 | 2587 | 319 | | twine | 1.00000 | 33 | 0 | | typeshed | 0.99983 | 3496 | 18 | | warehouse | 0.99967 | 648 | 15 | | zulip | 0.99972 | 1437 | 21 | --- .../test/fixtures/ruff/statement/if.py | 25 +++++++++ .../test/fixtures/ruff/statement/import.py | 11 ++++ .../src/comments/format.rs | 38 +++++++++---- .../src/statement/stmt_class_def.rs | 18 ++++--- .../src/statement/suite.rs | 33 +++++------- .../snapshots/format@statement__if.py.snap | 53 ++++++++++++++++++- .../format@statement__import.py.snap | 21 ++++++++ crates/ruff_python_trivia/src/tokenizer.rs | 25 ++++++++- 8 files changed, 182 insertions(+), 42 deletions(-) diff --git a/crates/ruff_python_formatter/resources/test/fixtures/ruff/statement/if.py b/crates/ruff_python_formatter/resources/test/fixtures/ruff/statement/if.py index 553b1db45904c..8859b1ad20595 100644 --- a/crates/ruff_python_formatter/resources/test/fixtures/ruff/statement/if.py +++ b/crates/ruff_python_formatter/resources/test/fixtures/ruff/statement/if.py @@ -207,6 +207,31 @@ def f(): else: pass +if True: + if True: + pass + else: + pass + # a + + # b + # c + +else: + pass + +if True: + if True: + pass + else: + pass + + # b + # c + +else: + pass + # Regression test for: https://github.com/astral-sh/ruff/issues/7602 if True: diff --git a/crates/ruff_python_formatter/resources/test/fixtures/ruff/statement/import.py b/crates/ruff_python_formatter/resources/test/fixtures/ruff/statement/import.py index 04bb263eaf9c2..7a172955b928e 100644 --- a/crates/ruff_python_formatter/resources/test/fixtures/ruff/statement/import.py +++ b/crates/ruff_python_formatter/resources/test/fixtures/ruff/statement/import.py @@ -65,3 +65,14 @@ def func(): logger = logging.getLogger("FastProject") + +# Regression test for: https://github.com/astral-sh/ruff/issues/7604 +import os +# comment + +# comment + + +# comment +x = 1 + diff --git a/crates/ruff_python_formatter/src/comments/format.rs b/crates/ruff_python_formatter/src/comments/format.rs index a2dbfd601a16e..f2ab3c9b7c775 100644 --- a/crates/ruff_python_formatter/src/comments/format.rs +++ b/crates/ruff_python_formatter/src/comments/format.rs @@ -3,7 +3,7 @@ use std::borrow::Cow; use ruff_formatter::{format_args, write, FormatError, FormatOptions, SourceCode}; use ruff_python_ast::node::{AnyNodeRef, AstNode}; use ruff_python_ast::PySourceType; -use ruff_python_trivia::{lines_after, lines_before}; +use ruff_python_trivia::{lines_after, lines_after_ignoring_trivia, lines_before}; use ruff_text_size::{Ranged, TextLen, TextRange}; use crate::comments::{CommentLinePosition, SourceComment}; @@ -96,16 +96,32 @@ impl Format> for FormatLeadingAlternateBranchComments<'_> { write!(f, [leading_comments(self.comments)])?; } else if let Some(last_preceding) = self.last_node { - // The leading comments formatting ensures that it preserves the right amount of lines after - // We need to take care of this ourselves, if there's no leading `else` comment. - let end = if let Some(last_trailing) = - f.context().comments().trailing(last_preceding).last() - { - last_trailing.end() - } else { - last_preceding.end() - }; - write!(f, [empty_lines(lines_after(end, f.context().source()))])?; + // The leading comments formatting ensures that it preserves the right amount of lines + // after We need to take care of this ourselves, if there's no leading `else` comment. + // Since the `last_node` could be a compound node, we need to skip _all_ trivia. + // + // For example, here, when formatting the `if` statement, the `last_node` (the `while`) + // would end at the end of `pass`, but we want to skip _all_ comments: + // ```python + // if True: + // while True: + // pass + // # comment + // + // # comment + // else: + // ... + // ``` + // + // `lines_after_ignoring_trivia` is safe here, as we _know_ that the `else` doesn't + // have any leading comments. + write!( + f, + [empty_lines(lines_after_ignoring_trivia( + last_preceding.end(), + f.context().source() + ))] + )?; } Ok(()) diff --git a/crates/ruff_python_formatter/src/statement/stmt_class_def.rs b/crates/ruff_python_formatter/src/statement/stmt_class_def.rs index 231320eac75f4..4dcf95edcb489 100644 --- a/crates/ruff_python_formatter/src/statement/stmt_class_def.rs +++ b/crates/ruff_python_formatter/src/statement/stmt_class_def.rs @@ -1,6 +1,6 @@ use ruff_formatter::write; use ruff_python_ast::{Decorator, StmtClassDef}; -use ruff_python_trivia::lines_after_ignoring_trivia; +use ruff_python_trivia::lines_after_ignoring_end_of_line_trivia; use ruff_text_size::Ranged; use crate::comments::format::empty_lines_before_trailing_comments; @@ -158,13 +158,15 @@ impl Format> for FormatDecorators<'_> { // Write any leading definition comments (between last decorator and the header) // while maintaining the right amount of empty lines between the comment // and the last decorator. - let leading_line = - if lines_after_ignoring_trivia(last_decorator.end(), f.context().source()) <= 1 - { - hard_line_break() - } else { - empty_line() - }; + let leading_line = if lines_after_ignoring_end_of_line_trivia( + last_decorator.end(), + f.context().source(), + ) <= 1 + { + hard_line_break() + } else { + empty_line() + }; write!( f, diff --git a/crates/ruff_python_formatter/src/statement/suite.rs b/crates/ruff_python_formatter/src/statement/suite.rs index eae1d849e21eb..9575f78d86a77 100644 --- a/crates/ruff_python_formatter/src/statement/suite.rs +++ b/crates/ruff_python_formatter/src/statement/suite.rs @@ -2,7 +2,7 @@ use ruff_formatter::{write, FormatOwnedWithRule, FormatRefWithRule, FormatRuleWi use ruff_python_ast::helpers::is_compound_statement; use ruff_python_ast::node::AnyNodeRef; use ruff_python_ast::{self as ast, Constant, Expr, ExprConstant, PySourceType, Stmt, Suite}; -use ruff_python_trivia::{lines_after, lines_after_ignoring_trivia, lines_before}; +use ruff_python_trivia::{lines_after, lines_after_ignoring_end_of_line_trivia, lines_before}; use ruff_text_size::{Ranged, TextRange}; use crate::comments::{ @@ -274,29 +274,20 @@ impl FormatRule> for FormatSuite { // * [`NodeLevel::CompoundStatement`]: Up to one empty line // * [`NodeLevel::Expression`]: No empty lines - let count_lines = |offset| { - // It's necessary to skip any trailing line comment because RustPython doesn't include trailing comments - // in the node's range - // ```python - // a # The range of `a` ends right before this comment - // - // b - // ``` - // - // Simply using `lines_after` doesn't work if a statement has a trailing comment because - // it then counts the lines between the statement and the trailing comment, which is - // always 0. This is why it skips any trailing trivia (trivia that's on the same line) - // and counts the lines after. - lines_after(offset, source) - }; - + // It's necessary to skip any trailing line comment because our parser doesn't + // include trailing comments in the node's range: + // ```python + // a # The range of `a` ends right before this comment + // + // b + // ``` let end = preceding_comments .trailing .last() .map_or(preceding.end(), |comment| comment.slice().end()); match node_level { - NodeLevel::TopLevel => match count_lines(end) { + NodeLevel::TopLevel => match lines_after(end, source) { 0 | 1 => hard_line_break().fmt(f)?, 2 => empty_line().fmt(f)?, _ => match source_type { @@ -308,7 +299,7 @@ impl FormatRule> for FormatSuite { } }, }, - NodeLevel::CompoundStatement => match count_lines(end) { + NodeLevel::CompoundStatement => match lines_after(end, source) { 0 | 1 => hard_line_break().fmt(f)?, _ => empty_line().fmt(f)?, }, @@ -379,7 +370,9 @@ fn stub_file_empty_lines( } } SuiteKind::Class | SuiteKind::Other | SuiteKind::Function => { - if empty_line_condition && lines_after_ignoring_trivia(preceding.end(), source) > 1 { + if empty_line_condition + && lines_after_ignoring_end_of_line_trivia(preceding.end(), source) > 1 + { empty_line().fmt(f) } else { hard_line_break().fmt(f) diff --git a/crates/ruff_python_formatter/tests/snapshots/format@statement__if.py.snap b/crates/ruff_python_formatter/tests/snapshots/format@statement__if.py.snap index 3b4aac308edf4..3795d7f03922f 100644 --- a/crates/ruff_python_formatter/tests/snapshots/format@statement__if.py.snap +++ b/crates/ruff_python_formatter/tests/snapshots/format@statement__if.py.snap @@ -213,6 +213,31 @@ if True: else: pass +if True: + if True: + pass + else: + pass + # a + + # b + # c + +else: + pass + +if True: + if True: + pass + else: + pass + + # b + # c + +else: + pass + # Regression test for: https://github.com/astral-sh/ruff/issues/7602 if True: @@ -493,6 +518,31 @@ if True: else: pass +if True: + if True: + pass + else: + pass + # a + + # b + # c + +else: + pass + +if True: + if True: + pass + else: + pass + + # b + # c + +else: + pass + # Regression test for: https://github.com/astral-sh/ruff/issues/7602 if True: @@ -503,7 +553,6 @@ if True: # a # b # c - else: pass @@ -515,6 +564,7 @@ if True: # a # c + else: pass @@ -528,7 +578,6 @@ if True: # a # b # c - else: pass diff --git a/crates/ruff_python_formatter/tests/snapshots/format@statement__import.py.snap b/crates/ruff_python_formatter/tests/snapshots/format@statement__import.py.snap index dd348f8a33967..7f993f5b49fd8 100644 --- a/crates/ruff_python_formatter/tests/snapshots/format@statement__import.py.snap +++ b/crates/ruff_python_formatter/tests/snapshots/format@statement__import.py.snap @@ -71,6 +71,17 @@ import os logger = logging.getLogger("FastProject") + +# Regression test for: https://github.com/astral-sh/ruff/issues/7604 +import os +# comment + +# comment + + +# comment +x = 1 + ``` ## Output @@ -149,6 +160,16 @@ import os logger = logging.getLogger("FastProject") + +# Regression test for: https://github.com/astral-sh/ruff/issues/7604 +import os +# comment + +# comment + + +# comment +x = 1 ``` diff --git a/crates/ruff_python_trivia/src/tokenizer.rs b/crates/ruff_python_trivia/src/tokenizer.rs index 060c673fc0e1e..b0bf36ded77ba 100644 --- a/crates/ruff_python_trivia/src/tokenizer.rs +++ b/crates/ruff_python_trivia/src/tokenizer.rs @@ -88,10 +88,33 @@ pub fn lines_after(offset: TextSize, code: &str) -> u32 { newlines } +/// Counts the empty lines after `offset`, ignoring any trailing trivia: end-of-line comments, +/// own-line comments, and any intermediary newlines. +pub fn lines_after_ignoring_trivia(offset: TextSize, code: &str) -> u32 { + let mut newlines = 0u32; + for token in SimpleTokenizer::starts_at(offset, code) { + match token.kind() { + SimpleTokenKind::Newline => { + newlines += 1; + } + SimpleTokenKind::Whitespace => {} + // If we see a comment, reset the newlines counter. + SimpleTokenKind::Comment => { + newlines = 0; + } + // As soon as we see a non-trivia token, we're done. + _ => { + break; + } + } + } + newlines +} + /// Counts the empty lines after `offset`, ignoring any trailing trivia on the same line as /// `offset`. #[allow(clippy::cast_possible_truncation)] -pub fn lines_after_ignoring_trivia(offset: TextSize, code: &str) -> u32 { +pub fn lines_after_ignoring_end_of_line_trivia(offset: TextSize, code: &str) -> u32 { // SAFETY: We don't support files greater than 4GB, so casting to u32 is safe. SimpleTokenizer::starts_at(offset, code) .skip_while(|token| token.kind != SimpleTokenKind::Newline && token.kind.is_trivia())