Use reserved width to include line suffix measurement (#6901)

Co-authored-by: Micha Reiser <micha@reiser.io>
astral-sh · Aug 30, 2023 · a3f4d77 · a3f4d77
1 parent edfd888
commit a3f4d77
Show file tree

Hide file tree

Showing 16 changed files with 504 additions and 80 deletions.
diff --git a/...ython_formatter/resources/test/fixtures/black/simple_cases/comments_non_breaking_space.py b/...ython_formatter/resources/test/fixtures/black/simple_cases/comments_non_breaking_space.py
@@ -14,6 +14,6 @@ def function(a:int=42):
        a
        b
     """
-    #    There's a NBSP + 3 spaces before
+    #      There's a NBSP + 3 spaces before
     #    And 4 spaces on the next line
     pass
diff --git a/crates/ruff_python_formatter/resources/test/fixtures/ruff/expression/tuple.py b/crates/ruff_python_formatter/resources/test/fixtures/ruff/expression/tuple.py
@@ -66,3 +66,6 @@
 h1 = ((((1, 2))))
 h2 = ((((1, "qweiurpoiqwurepqiurpqirpuqoiwrupqoirupqoirupqoiurpqiorupwqiourpqurpqurpqurpqurpqurpqurüqurqpuriq"))))
 h3 = 1, "qweiurpoiqwurepqiurpqirpuqoiwrupqoirupqoirupqoiurpqiorupwqiourpqurpqurpqurpqurpqurpqurüqurqpuriq"
+
+i1 = ("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",)  # This should break
+
diff --git a/crates/ruff_python_formatter/resources/test/fixtures/ruff/trailing_comments.py b/crates/ruff_python_formatter/resources/test/fixtures/ruff/trailing_comments.py
@@ -0,0 +1,6 @@
+# As of adding this fixture Black adds a space before the non-breaking space if part of a type pragma.
+# https://github.com/psf/black/blob/b4dca26c7d93f930bbd5a7b552807370b60d4298/src/black/comments.py#L122-L129
+i2 = ""  #         type: Add space before leading NBSP followed by spaces
+i3 = ""  #type: A space is added
+i4 = ""  #  type: Add space before leading NBSP followed by a space
+i5 = ""  # type: Add space before leading NBSP
diff --git a/crates/ruff_python_formatter/src/comments/format.rs b/crates/ruff_python_formatter/src/comments/format.rs
@@ -1,6 +1,9 @@
-use ruff_text_size::{Ranged, TextLen, TextRange, TextSize};
+use std::borrow::Cow;
+use unicode_width::UnicodeWidthChar;
 
-use ruff_formatter::{format_args, write, FormatError, SourceCode};
+use ruff_text_size::{Ranged, TextLen, TextRange};
+
+use ruff_formatter::{format_args, write, FormatError, FormatOptions, SourceCode};
 use ruff_python_ast::node::{AnyNodeRef, AstNode};
 use ruff_python_trivia::{lines_after, lines_after_ignoring_trivia, lines_before};
 
@@ -151,19 +154,22 @@ impl Format<PyFormatContext<'_>> for FormatTrailingComments<'_> {
                                 empty_lines(lines_before_comment),
                                 format_comment(trailing)
                             ],
+                            // Reserving width isn't necessary because we don't split
+                            // comments and the empty lines expand any enclosing group.
                             0
                         ),
                         expand_parent()
                     ]
                 )?;
             } else {
-                write!(
-                    f,
-                    [
-                        line_suffix(&format_args![space(), space(), format_comment(trailing)], 0),
-                        expand_parent()
-                    ]
-                )?;
+                // A trailing comment at the end of a line has a reserved width to
+                // consider during line measurement.
+                // ```python
+                // tup = (
+                //     "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+                // )  # Some comment
+                // ```
+                trailing_end_of_line_comment(trailing).fmt(f)?;
             }
 
             trailing.mark_formatted();
@@ -262,13 +268,7 @@ impl Format<PyFormatContext<'_>> for FormatDanglingOpenParenthesisComments<'_> {
                 "Expected dangling comment to be at the end of the line"
             );
 
-            write!(
-                f,
-                [
-                    line_suffix(&format_args!(space(), space(), format_comment(comment)), 0),
-                    expand_parent()
-                ]
-            )?;
+            trailing_end_of_line_comment(comment).fmt(f)?;
             comment.mark_formatted();
         }
 
@@ -291,50 +291,11 @@ pub(crate) struct FormatComment<'a> {
 impl Format<PyFormatContext<'_>> for FormatComment<'_> {
     fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> {
         let slice = self.comment.slice();
-        let comment_text = slice.text(SourceCode::new(f.context().source()));
-
-        let trimmed = comment_text.trim_end();
-        let trailing_whitespace_len = comment_text.text_len() - trimmed.text_len();
-
-        let Some(content) = trimmed.strip_prefix('#') else {
-            return Err(FormatError::syntax_error(
-                "Didn't find expected comment token `#`",
-            ));
-        };
-
-        // Fast path for correctly formatted comments:
-        // * Start with a `#` and are followed by a space
-        // * Have no trailing whitespace.
-        if trailing_whitespace_len == TextSize::new(0) && content.starts_with(' ') {
-            return source_text_slice(slice.range(), ContainsNewlines::No).fmt(f);
-        }
-
-        write!(f, [source_position(slice.start()), text("#")])?;
-
-        // Starts with a non breaking space
-        let start_offset =
-            if content.starts_with('\u{A0}') && !content.trim_start().starts_with("type:") {
-                // Replace non-breaking space with a space (if not followed by a normal space)
-                "#\u{A0}".text_len()
-            } else {
-                '#'.text_len()
-            };
-
-        // Add a space between the `#` and the text if the source contains none.
-        if !content.is_empty() && !content.starts_with([' ', '!', ':', '#', '\'']) {
-            write!(f, [space()])?;
-        }
+        let source = SourceCode::new(f.context().source());
 
-        let start = slice.start() + start_offset;
-        let end = slice.end() - trailing_whitespace_len;
+        let normalized_comment = normalize_comment(self.comment, source)?;
 
-        write!(
-            f,
-            [
-                source_text_slice(TextRange::new(start, end), ContainsNewlines::No),
-                source_position(slice.end())
-            ]
-        )
+        format_normalized_comment(normalized_comment, slice.range()).fmt(f)
     }
 }
 
@@ -372,3 +333,145 @@ impl Format<PyFormatContext<'_>> for FormatEmptyLines {
         }
     }
 }
+
+/// A helper that constructs a formattable element using a reserved-width line-suffix
+/// for normalized comments.
+///
+/// * Black normalization of `SourceComment`.
+/// * Line suffix with reserved width for the final, normalized content.
+/// * Expands parent node.
+pub(crate) const fn trailing_end_of_line_comment(
+    comment: &SourceComment,
+) -> FormatTrailingEndOfLineComment {
+    FormatTrailingEndOfLineComment { comment }
+}
+
+pub(crate) struct FormatTrailingEndOfLineComment<'a> {
+    comment: &'a SourceComment,
+}
+
+impl Format<PyFormatContext<'_>> for FormatTrailingEndOfLineComment<'_> {
+    fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> {
+        let slice = self.comment.slice();
+        let source = SourceCode::new(f.context().source());
+
+        let normalized_comment = normalize_comment(self.comment, source)?;
+
+        // Start with 2 because of the two leading spaces.
+        let mut reserved_width = 2;
+
+        // SAFE: The formatted file is <= 4GB, and each comment should as well.
+        #[allow(clippy::cast_possible_truncation)]
+        for c in normalized_comment.chars() {
+            reserved_width += match c {
+                '\t' => f.options().tab_width().value(),
+                c => c.width().unwrap_or(0) as u32,
+            }
+        }
+
+        write!(
+            f,
+            [
+                line_suffix(
+                    &format_args![
+                        space(),
+                        space(),
+                        format_normalized_comment(normalized_comment, slice.range())
+                    ],
+                    reserved_width
+                ),
+                expand_parent()
+            ]
+        )
+    }
+}
+
+/// A helper that constructs formattable normalized comment text as efficiently as
+/// possible.
+///
+/// * If the content is unaltered then format with source text slice strategy and no
+///   unnecessary allocations.
+/// * If the content is modified then make as few allocations as possible and use
+///   a dynamic text element at the original slice's start position.
+pub(crate) const fn format_normalized_comment(
+    comment: Cow<'_, str>,
+    range: TextRange,
+) -> FormatNormalizedComment<'_> {
+    FormatNormalizedComment { comment, range }
+}
+
+pub(crate) struct FormatNormalizedComment<'a> {
+    comment: Cow<'a, str>,
+    range: TextRange,
+}
+
+impl Format<PyFormatContext<'_>> for FormatNormalizedComment<'_> {
+    fn fmt(&self, f: &mut Formatter<PyFormatContext>) -> FormatResult<()> {
+        match self.comment {
+            Cow::Borrowed(borrowed) => source_text_slice(
+                TextRange::at(self.range.start(), borrowed.text_len()),
+                ContainsNewlines::No,
+            )
+            .fmt(f),
+
+            Cow::Owned(ref owned) => {
+                write!(
+                    f,
+                    [
+                        dynamic_text(owned, Some(self.range.start())),
+                        source_position(self.range.end())
+                    ]
+                )
+            }
+        }
+    }
+}
+
+/// A helper for normalizing comments efficiently.
+///
+/// * Return as fast as possible without making unnecessary allocations.
+/// * Trim any trailing whitespace.
+/// * Normalize for a leading '# '.
+/// * Retain non-breaking spaces for 'type:' pragmas by leading with '# \u{A0}'.
+fn normalize_comment<'a>(
+    comment: &'a SourceComment,
+    source: SourceCode<'a>,
+) -> FormatResult<Cow<'a, str>> {
+    let slice = comment.slice();
+    let comment_text = slice.text(source);
+
+    let trimmed = comment_text.trim_end();
+
+    let Some(content) = trimmed.strip_prefix('#') else {
+        return Err(FormatError::syntax_error(
+            "Didn't find expected comment token `#`",
+        ));
+    };
+
+    if content.is_empty() {
+        return Ok(Cow::Borrowed("#"));
+    }
+
+    // Fast path for correctly formatted comments:
+    // * Start with a `# '.
+    // * Have no trailing whitespace.
+    if content.starts_with([' ', '!', ':', '#', '\'']) {
+        return Ok(Cow::Borrowed(trimmed));
+    }
+
+    if content.starts_with('\u{A0}') {
+        let trimmed = content.trim_start_matches('\u{A0}');
+
+        // Black adds a space before the non-breaking space if part of a type pragma.
+        if trimmed.trim_start().starts_with("type:") {
+            return Ok(Cow::Owned(std::format!("# \u{A0}{trimmed}")));
+        }
+
+        // Black replaces the non-breaking space with a space if followed by a space.
+        if trimmed.starts_with(' ') {
+            return Ok(Cow::Owned(std::format!("# {trimmed}")));
+        }
+    }
+
+    Ok(Cow::Owned(std::format!("# {}", content.trim_start())))
+}
diff --git a/crates/ruff_python_formatter/src/expression/mod.rs b/crates/ruff_python_formatter/src/expression/mod.rs
@@ -247,10 +247,13 @@ impl Format<PyFormatContext<'_>> for MaybeParenthesizeExpression<'_> {
                     if format_expression.inspect(f)?.will_break() {
                         // The group here is necessary because `format_expression` may contain IR elements
                         // that refer to the group id
-                        group(&format_expression)
-                            .with_group_id(Some(group_id))
-                            .should_expand(true)
-                            .fmt(f)
+                        group(&format_args![
+                            text("("),
+                            soft_block_indent(&format_expression),
+                            text(")")
+                        ])
+                        .with_group_id(Some(group_id))
+                        .fmt(f)
                     } else {
                         // Only add parentheses if it makes the expression fit on the line.
                         // Using the flat version as the most expanded version gives a left-to-right splitting behavior

diff --git a/...ruff_python_formatter/tests/snapshots/black_compatibility@simple_cases__comments6.py.snap b/...ruff_python_formatter/tests/snapshots/black_compatibility@simple_cases__comments6.py.snap
@@ -156,7 +156,7 @@ aaaaaaaaaaaaa, bbbbbbbbb = map(list, map(itertools.chain.from_iterable, zip(*ite
      )
  
  
-@@ -108,11 +112,18 @@
+@@ -108,11 +112,20 @@
      "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
  )
  
@@ -176,7 +176,10 @@ aaaaaaaaaaaaa, bbbbbbbbb = map(list, map(itertools.chain.from_iterable, zip(*ite
 +    ],  # type: ignore
  )
  
- aaaaaaaaaaaaa, bbbbbbbbb = map(list, map(itertools.chain.from_iterable, zip(*items)))  # type: ignore[arg-type]
+-aaaaaaaaaaaaa, bbbbbbbbb = map(list, map(itertools.chain.from_iterable, zip(*items)))  # type: ignore[arg-type]
++aaaaaaaaaaaaa, bbbbbbbbb = map(
++    list, map(itertools.chain.from_iterable, zip(*items))
++)  # type: ignore[arg-type]
 ```
 
 ## Ruff Output
@@ -310,7 +313,9 @@ call_to_some_function_asdf(
     ],  # type: ignore
 )
 
-aaaaaaaaaaaaa, bbbbbbbbb = map(list, map(itertools.chain.from_iterable, zip(*items)))  # type: ignore[arg-type]
+aaaaaaaaaaaaa, bbbbbbbbb = map(
+    list, map(itertools.chain.from_iterable, zip(*items))
+)  # type: ignore[arg-type]
 ```
 
 ## Black Output

diff --git a/...uff_python_formatter/tests/snapshots/black_compatibility@simple_cases__expression.py.snap b/...uff_python_formatter/tests/snapshots/black_compatibility@simple_cases__expression.py.snap
@@ -300,7 +300,18 @@ last_call()
  )  # note: no trailing comma pre-3.6
  call(*gidgets[:2])
  call(a, *gidgets[:2])
-@@ -328,13 +329,18 @@
+@@ -142,7 +143,9 @@
+ xxxx_xxxxx_xxxx_xxx: Callable[..., List[SomeClass]] = classmethod(  # type: ignore
+     sync(async_xxxx_xxx_xxxx_xxxxx_xxxx_xxx.__func__)
+ )
+-xxxx_xxx_xxxx_xxxxx_xxxx_xxx: Callable[..., List[SomeClass]] = classmethod(  # type: ignore
++xxxx_xxx_xxxx_xxxxx_xxxx_xxx: Callable[
++    ..., List[SomeClass]
++] = classmethod(  # type: ignore
+     sync(async_xxxx_xxx_xxxx_xxxxx_xxxx_xxx.__func__)
+ )
+ xxxx_xxx_xxxx_xxxxx_xxxx_xxx: Callable[..., List[SomeClass]] = classmethod(
+@@ -328,13 +331,18 @@
  ):
      return True
  if (
@@ -322,7 +333,7 @@ last_call()
      ^ aaaaaaaa.i << aaaaaaaa.k >> aaaaaaaa.l**aaaaaaaa.m // aaaaaaaa.n
  ):
      return True
-@@ -342,7 +348,8 @@
+@@ -342,7 +350,8 @@
      ~aaaaaaaaaaaaaaaa.a
      + aaaaaaaaaaaaaaaa.b
      - aaaaaaaaaaaaaaaa.c * aaaaaaaaaaaaaaaa.d @ aaaaaaaaaaaaaaaa.e
@@ -482,7 +493,9 @@ very_long_variable_name_filters: t.List[
 xxxx_xxxxx_xxxx_xxx: Callable[..., List[SomeClass]] = classmethod(  # type: ignore
     sync(async_xxxx_xxx_xxxx_xxxxx_xxxx_xxx.__func__)
 )
-xxxx_xxx_xxxx_xxxxx_xxxx_xxx: Callable[..., List[SomeClass]] = classmethod(  # type: ignore
+xxxx_xxx_xxxx_xxxxx_xxxx_xxx: Callable[
+    ..., List[SomeClass]
+] = classmethod(  # type: ignore
     sync(async_xxxx_xxx_xxxx_xxxxx_xxxx_xxx.__func__)
 )
 xxxx_xxx_xxxx_xxxxx_xxxx_xxx: Callable[..., List[SomeClass]] = classmethod(