Preserve newlines after nested compound statements (#7608)

## Summary Given: ```python if True: if True: pass else: pass # a # b # c else: pass ``` We want to preserve the newline after the `# c` (before the `else`). However, the `last_node` ends at the `pass`, and the comments are trailing comments on the `pass`, not trailing comments on the `last_node` (the `if`). As such, when counting the trailing newlines on the outer `if`, we abort as soon as we see the comment (`# a`). This PR changes the logic to skip _all_ comments (even those with newlines between them). This is safe as we know that there are no "leading" comments on the `else`, so there's no risk of skipping those accidentally. Closes #7602. ## Test Plan No change in compatibility. Before: | project | similarity index | total files | changed files | |--------------|------------------:|------------------:|------------------:| | cpython | 0.76083 | 1789 | 1631 | | django | 0.99983 | 2760 | 36 | | transformers | 0.99963 | 2587 | 319 | | twine | 1.00000 | 33 | 0 | | typeshed | 0.99979 | 3496 | 22 | | warehouse | 0.99967 | 648 | 15 | | zulip | 0.99972 | 1437 | 21 | After: | project | similarity index | total files | changed files | |--------------|------------------:|------------------:|------------------:| | cpython | 0.76083 | 1789 | 1631 | | django | 0.99983 | 2760 | 36 | | transformers | 0.99963 | 2587 | 319 | | twine | 1.00000 | 33 | 0 | | typeshed | 0.99983 | 3496 | 18 | | warehouse | 0.99967 | 648 | 15 | | zulip | 0.99972 | 1437 | 21 |
astral-sh · Sep 25, 2023 · 17ceb5d · 17ceb5d
1 parent 8ce1387
commit 17ceb5d
Show file tree

Hide file tree

Showing 8 changed files with 182 additions and 42 deletions.
diff --git a/crates/ruff_python_formatter/resources/test/fixtures/ruff/statement/if.py b/crates/ruff_python_formatter/resources/test/fixtures/ruff/statement/if.py
@@ -207,6 +207,31 @@ def f():
 else:
     pass
 
+if True:
+    if True:
+        pass
+    else:
+        pass
+        # a
+
+        # b
+        # c
+
+else:
+    pass
+
+if True:
+    if True:
+        pass
+    else:
+        pass
+
+        # b
+        # c
+
+else:
+    pass
+
 
 # Regression test for: https://github.com/astral-sh/ruff/issues/7602
 if True:

diff --git a/crates/ruff_python_formatter/resources/test/fixtures/ruff/statement/import.py b/crates/ruff_python_formatter/resources/test/fixtures/ruff/statement/import.py
@@ -65,3 +65,14 @@ def func():
 
 
 logger = logging.getLogger("FastProject")
+
+# Regression test for: https://github.com/astral-sh/ruff/issues/7604
+import os
+# comment
+
+# comment
+
+
+# comment
+x = 1
+
diff --git a/crates/ruff_python_formatter/src/comments/format.rs b/crates/ruff_python_formatter/src/comments/format.rs
@@ -3,7 +3,7 @@ use std::borrow::Cow;
 use ruff_formatter::{format_args, write, FormatError, FormatOptions, SourceCode};
 use ruff_python_ast::node::{AnyNodeRef, AstNode};
 use ruff_python_ast::PySourceType;
-use ruff_python_trivia::{lines_after, lines_before};
+use ruff_python_trivia::{lines_after, lines_after_ignoring_trivia, lines_before};
 use ruff_text_size::{Ranged, TextLen, TextRange};
 
 use crate::comments::{CommentLinePosition, SourceComment};
@@ -96,16 +96,32 @@ impl Format<PyFormatContext<'_>> for FormatLeadingAlternateBranchComments<'_> {
 
             write!(f, [leading_comments(self.comments)])?;
         } else if let Some(last_preceding) = self.last_node {
-            // The leading comments formatting ensures that it preserves the right amount of lines after
-            // We need to take care of this ourselves, if there's no leading `else` comment.
-            let end = if let Some(last_trailing) =
-                f.context().comments().trailing(last_preceding).last()
-            {
-                last_trailing.end()
-            } else {
-                last_preceding.end()
-            };
-            write!(f, [empty_lines(lines_after(end, f.context().source()))])?;
+            // The leading comments formatting ensures that it preserves the right amount of lines
+            // after We need to take care of this ourselves, if there's no leading `else` comment.
+            // Since the `last_node` could be a compound node, we need to skip _all_ trivia.
+            //
+            // For example, here, when formatting the `if` statement, the `last_node` (the `while`)
+            // would end at the end of `pass`, but we want to skip _all_ comments:
+            // ```python
+            // if True:
+            //     while True:
+            //         pass
+            //         # comment
+            //
+            //     # comment
+            // else:
+            //     ...
+            // ```
+            //
+            // `lines_after_ignoring_trivia` is safe here, as we _know_ that the `else` doesn't
+            // have any leading comments.
+            write!(
+                f,
+                [empty_lines(lines_after_ignoring_trivia(
+                    last_preceding.end(),
+                    f.context().source()
+                ))]
+            )?;
         }
 
         Ok(())

diff --git a/crates/ruff_python_formatter/src/statement/stmt_class_def.rs b/crates/ruff_python_formatter/src/statement/stmt_class_def.rs
@@ -1,6 +1,6 @@
 use ruff_formatter::write;
 use ruff_python_ast::{Decorator, StmtClassDef};
-use ruff_python_trivia::lines_after_ignoring_trivia;
+use ruff_python_trivia::lines_after_ignoring_end_of_line_trivia;
 use ruff_text_size::Ranged;
 
 use crate::comments::format::empty_lines_before_trailing_comments;
@@ -158,13 +158,15 @@ impl Format<PyFormatContext<'_>> for FormatDecorators<'_> {
                 // Write any leading definition comments (between last decorator and the header)
                 // while maintaining the right amount of empty lines between the comment
                 // and the last decorator.
-                let leading_line =
-                    if lines_after_ignoring_trivia(last_decorator.end(), f.context().source()) <= 1
-                    {
-                        hard_line_break()
-                    } else {
-                        empty_line()
-                    };
+                let leading_line = if lines_after_ignoring_end_of_line_trivia(
+                    last_decorator.end(),
+                    f.context().source(),
+                ) <= 1
+                {
+                    hard_line_break()
+                } else {
+                    empty_line()
+                };
 
                 write!(
                     f,

diff --git a/crates/ruff_python_formatter/src/statement/suite.rs b/crates/ruff_python_formatter/src/statement/suite.rs
@@ -2,7 +2,7 @@ use ruff_formatter::{write, FormatOwnedWithRule, FormatRefWithRule, FormatRuleWi
 use ruff_python_ast::helpers::is_compound_statement;
 use ruff_python_ast::node::AnyNodeRef;
 use ruff_python_ast::{self as ast, Constant, Expr, ExprConstant, PySourceType, Stmt, Suite};
-use ruff_python_trivia::{lines_after, lines_after_ignoring_trivia, lines_before};
+use ruff_python_trivia::{lines_after, lines_after_ignoring_end_of_line_trivia, lines_before};
 use ruff_text_size::{Ranged, TextRange};
 
 use crate::comments::{
@@ -274,29 +274,20 @@ impl FormatRule<Suite, PyFormatContext<'_>> for FormatSuite {
                 // * [`NodeLevel::CompoundStatement`]: Up to one empty line
                 // * [`NodeLevel::Expression`]: No empty lines
 
-                let count_lines = |offset| {
-                    // It's necessary to skip any trailing line comment because RustPython doesn't include trailing comments
-                    // in the node's range
-                    // ```python
-                    // a # The range of `a` ends right before this comment
-                    //
-                    // b
-                    // ```
-                    //
-                    // Simply using `lines_after` doesn't work if a statement has a trailing comment because
-                    // it then counts the lines between the statement and the trailing comment, which is
-                    // always 0. This is why it skips any trailing trivia (trivia that's on the same line)
-                    // and counts the lines after.
-                    lines_after(offset, source)
-                };
-
+                // It's necessary to skip any trailing line comment because our parser doesn't
+                // include trailing comments in the node's range:
+                // ```python
+                // a # The range of `a` ends right before this comment
+                //
+                // b
+                // ```
                 let end = preceding_comments
                     .trailing
                     .last()
                     .map_or(preceding.end(), |comment| comment.slice().end());
 
                 match node_level {
-                    NodeLevel::TopLevel => match count_lines(end) {
+                    NodeLevel::TopLevel => match lines_after(end, source) {
                         0 | 1 => hard_line_break().fmt(f)?,
                         2 => empty_line().fmt(f)?,
                         _ => match source_type {
@@ -308,7 +299,7 @@ impl FormatRule<Suite, PyFormatContext<'_>> for FormatSuite {
                             }
                         },
                     },
-                    NodeLevel::CompoundStatement => match count_lines(end) {
+                    NodeLevel::CompoundStatement => match lines_after(end, source) {
                         0 | 1 => hard_line_break().fmt(f)?,
                         _ => empty_line().fmt(f)?,
                     },
@@ -379,7 +370,9 @@ fn stub_file_empty_lines(
             }
         }
         SuiteKind::Class | SuiteKind::Other | SuiteKind::Function => {
-            if empty_line_condition && lines_after_ignoring_trivia(preceding.end(), source) > 1 {
+            if empty_line_condition
+                && lines_after_ignoring_end_of_line_trivia(preceding.end(), source) > 1
+            {
                 empty_line().fmt(f)
             } else {
                 hard_line_break().fmt(f)

diff --git a/crates/ruff_python_formatter/tests/snapshots/format@statement__if.py.snap b/crates/ruff_python_formatter/tests/snapshots/format@statement__if.py.snap
@@ -213,6 +213,31 @@ if True:
 else:
     pass
 
+if True:
+    if True:
+        pass
+    else:
+        pass
+        # a
+
+        # b
+        # c
+
+else:
+    pass
+
+if True:
+    if True:
+        pass
+    else:
+        pass
+
+        # b
+        # c
+
+else:
+    pass
+
 
 # Regression test for: https://github.com/astral-sh/ruff/issues/7602
 if True:
@@ -493,6 +518,31 @@ if True:
 else:
     pass
 
+if True:
+    if True:
+        pass
+    else:
+        pass
+        # a
+
+        # b
+        # c
+
+else:
+    pass
+
+if True:
+    if True:
+        pass
+    else:
+        pass
+
+        # b
+        # c
+
+else:
+    pass
+
 
 # Regression test for: https://github.com/astral-sh/ruff/issues/7602
 if True:
@@ -503,7 +553,6 @@ if True:
         # a
         # b
         # c
-
 else:
     pass
 
@@ -515,6 +564,7 @@ if True:
 
         # a
         # c
+
 else:
     pass
 
@@ -528,7 +578,6 @@ if True:
         # a
         # b
         # c
-
 else:
     pass
 

diff --git a/crates/ruff_python_formatter/tests/snapshots/format@statement__import.py.snap b/crates/ruff_python_formatter/tests/snapshots/format@statement__import.py.snap
@@ -71,6 +71,17 @@ import os
 
 
 logger = logging.getLogger("FastProject")
+
+# Regression test for: https://github.com/astral-sh/ruff/issues/7604
+import os
+# comment
+
+# comment
+
+
+# comment
+x = 1
+
 ```
 
 ## Output
@@ -149,6 +160,16 @@ import os
 
 
 logger = logging.getLogger("FastProject")
+
+# Regression test for: https://github.com/astral-sh/ruff/issues/7604
+import os
+# comment
+
+# comment
+
+
+# comment
+x = 1
 ```
 
 

diff --git a/crates/ruff_python_trivia/src/tokenizer.rs b/crates/ruff_python_trivia/src/tokenizer.rs
@@ -88,10 +88,33 @@ pub fn lines_after(offset: TextSize, code: &str) -> u32 {
     newlines
 }
 
+/// Counts the empty lines after `offset`, ignoring any trailing trivia: end-of-line comments,
+/// own-line comments, and any intermediary newlines.
+pub fn lines_after_ignoring_trivia(offset: TextSize, code: &str) -> u32 {
+    let mut newlines = 0u32;
+    for token in SimpleTokenizer::starts_at(offset, code) {
+        match token.kind() {
+            SimpleTokenKind::Newline => {
+                newlines += 1;
+            }
+            SimpleTokenKind::Whitespace => {}
+            // If we see a comment, reset the newlines counter.
+            SimpleTokenKind::Comment => {
+                newlines = 0;
+            }
+            // As soon as we see a non-trivia token, we're done.
+            _ => {
+                break;
+            }
+        }
+    }
+    newlines
+}
+
 /// Counts the empty lines after `offset`, ignoring any trailing trivia on the same line as
 /// `offset`.
 #[allow(clippy::cast_possible_truncation)]
-pub fn lines_after_ignoring_trivia(offset: TextSize, code: &str) -> u32 {
+pub fn lines_after_ignoring_end_of_line_trivia(offset: TextSize, code: &str) -> u32 {
     // SAFETY: We don't support files greater than 4GB, so casting to u32 is safe.
     SimpleTokenizer::starts_at(offset, code)
         .skip_while(|token| token.kind != SimpleTokenKind::Newline && token.kind.is_trivia())