From 514b8880118138f48164fd83de99b72a2bb8ae64 Mon Sep 17 00:00:00 2001 From: harupy Date: Fri, 28 Jul 2023 23:02:37 +0900 Subject: [PATCH 1/7] Preserve backslash in raw string --- .../src/expression/string.rs | 16 ++++++++++++---- ...tibility@miscellaneous__string_quotes.py.snap | 9 ++++----- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/crates/ruff_python_formatter/src/expression/string.rs b/crates/ruff_python_formatter/src/expression/string.rs index 8395122be2235..31dba581f88b4 100644 --- a/crates/ruff_python_formatter/src/expression/string.rs +++ b/crates/ruff_python_formatter/src/expression/string.rs @@ -207,7 +207,11 @@ impl Format> for FormatStringPart { write!(f, [prefix, preferred_quotes])?; - let (normalized, contains_newlines) = normalize_string(raw_content, preferred_quotes); + let (normalized, contains_newlines) = normalize_string( + raw_content, + preferred_quotes, + matches!(prefix, StringPrefix::RAW | StringPrefix::RAW_UPPER), + ); match normalized { Cow::Borrowed(_) => { @@ -223,7 +227,7 @@ impl Format> for FormatStringPart { } bitflags! { - #[derive(Copy, Clone, Debug)] + #[derive(Copy, Clone, Debug, PartialEq, Eq)] pub(super) struct StringPrefix: u8 { const UNICODE = 0b0000_0001; /// `r"test"` @@ -434,7 +438,11 @@ impl Format> for StringQuotes { /// with the provided `style`. /// /// Returns the normalized string and whether it contains new lines. -fn normalize_string(input: &str, quotes: StringQuotes) -> (Cow, ContainsNewlines) { +fn normalize_string( + input: &str, + quotes: StringQuotes, + is_raw: bool, +) -> (Cow, ContainsNewlines) { // The normalized string if `input` is not yet normalized. // `output` must remain empty if `input` is already normalized. let mut output = String::new(); @@ -468,7 +476,7 @@ fn normalize_string(input: &str, quotes: StringQuotes) -> (Cow, ContainsNew } else if c == '\n' { newlines = ContainsNewlines::Yes; } else if !quotes.triple { - if c == '\\' { + if !is_raw && c == '\\' { if let Some(next) = input.as_bytes().get(index + 1).copied().map(char::from) { #[allow(clippy::if_same_then_else)] if next == opposite_quote { diff --git a/crates/ruff_python_formatter/tests/snapshots/black_compatibility@miscellaneous__string_quotes.py.snap b/crates/ruff_python_formatter/tests/snapshots/black_compatibility@miscellaneous__string_quotes.py.snap index f2d2835cccdfd..7eb9f43769a44 100644 --- a/crates/ruff_python_formatter/tests/snapshots/black_compatibility@miscellaneous__string_quotes.py.snap +++ b/crates/ruff_python_formatter/tests/snapshots/black_compatibility@miscellaneous__string_quotes.py.snap @@ -82,13 +82,12 @@ f"\"{a}\"{'hello' * b}\"{c}\"" +f"NOT_YET_IMPLEMENTED_ExprJoinedStr" +f"NOT_YET_IMPLEMENTED_ExprJoinedStr" r"raw string ftw" --r"Date d\'expiration:(.*)" -+r"Date d'expiration:(.*)" + r"Date d\'expiration:(.*)" r'Tricky "quote' -r"Not-so-tricky \"quote" -rf"{yay}" -"\nThe \"quick\"\nbrown fox\njumps over\nthe 'lazy' dog.\n" -+r'Not-so-tricky "quote' ++r'Not-so-tricky \"quote' +f"NOT_YET_IMPLEMENTED_ExprJoinedStr" +"\n\ +The \"quick\"\n\ @@ -147,9 +146,9 @@ f"NOT_YET_IMPLEMENTED_ExprJoinedStr" f"NOT_YET_IMPLEMENTED_ExprJoinedStr" f"NOT_YET_IMPLEMENTED_ExprJoinedStr" r"raw string ftw" -r"Date d'expiration:(.*)" +r"Date d\'expiration:(.*)" r'Tricky "quote' -r'Not-so-tricky "quote' +r'Not-so-tricky \"quote' f"NOT_YET_IMPLEMENTED_ExprJoinedStr" "\n\ The \"quick\"\n\ From e504f180c7d74656325d938a39735fb6ac3170d6 Mon Sep 17 00:00:00 2001 From: harupy Date: Sat, 29 Jul 2023 01:40:42 +0900 Subject: [PATCH 2/7] Add is_raw_string --- crates/ruff_python_formatter/src/expression/string.rs | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/crates/ruff_python_formatter/src/expression/string.rs b/crates/ruff_python_formatter/src/expression/string.rs index 31dba581f88b4..b1c3bf901a299 100644 --- a/crates/ruff_python_formatter/src/expression/string.rs +++ b/crates/ruff_python_formatter/src/expression/string.rs @@ -207,11 +207,8 @@ impl Format> for FormatStringPart { write!(f, [prefix, preferred_quotes])?; - let (normalized, contains_newlines) = normalize_string( - raw_content, - preferred_quotes, - matches!(prefix, StringPrefix::RAW | StringPrefix::RAW_UPPER), - ); + let (normalized, contains_newlines) = + normalize_string(raw_content, preferred_quotes, prefix.is_raw_string()); match normalized { Cow::Borrowed(_) => { @@ -268,6 +265,10 @@ impl StringPrefix { pub(super) const fn text_len(self) -> TextSize { TextSize::new(self.bits().count_ones()) } + + pub(super) const fn is_raw_string(self) -> bool { + matches!(self, StringPrefix::RAW | StringPrefix::RAW_UPPER) + } } impl Format> for StringPrefix { From 640883066b8793792d6452addb5c177ec93b3985 Mon Sep 17 00:00:00 2001 From: harupy Date: Sat, 29 Jul 2023 11:27:38 +0900 Subject: [PATCH 3/7] Move is_raw --- crates/ruff_python_formatter/src/expression/string.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/ruff_python_formatter/src/expression/string.rs b/crates/ruff_python_formatter/src/expression/string.rs index b1c3bf901a299..56e8db8e36d34 100644 --- a/crates/ruff_python_formatter/src/expression/string.rs +++ b/crates/ruff_python_formatter/src/expression/string.rs @@ -476,8 +476,8 @@ fn normalize_string( newlines = ContainsNewlines::Yes; } else if c == '\n' { newlines = ContainsNewlines::Yes; - } else if !quotes.triple { - if !is_raw && c == '\\' { + } else if !quotes.triple && !is_raw { + if c == '\\' { if let Some(next) = input.as_bytes().get(index + 1).copied().map(char::from) { #[allow(clippy::if_same_then_else)] if next == opposite_quote { From c1577fd741c2e425937249307e6ac7beac676cbc Mon Sep 17 00:00:00 2001 From: harupy Date: Sun, 30 Jul 2023 13:41:59 +0900 Subject: [PATCH 4/7] Fix quote detection for raw string --- .../src/expression/string.rs | 56 ++++++++++++++++++- ...ility@miscellaneous__string_quotes.py.snap | 5 +- 2 files changed, 56 insertions(+), 5 deletions(-) diff --git a/crates/ruff_python_formatter/src/expression/string.rs b/crates/ruff_python_formatter/src/expression/string.rs index 56e8db8e36d34..9b68a86e8ae54 100644 --- a/crates/ruff_python_formatter/src/expression/string.rs +++ b/crates/ruff_python_formatter/src/expression/string.rs @@ -203,12 +203,17 @@ impl Format> for FormatStringPart { let raw_content_range = relative_raw_content_range + self.part_range.start(); let raw_content = &string_content[relative_raw_content_range]; - let preferred_quotes = preferred_quotes(raw_content, quotes, f.options().quote_style()); + let is_raw_string = prefix.is_raw_string(); + let preferred_quotes = if is_raw_string { + preferred_quotes_raw(raw_content, quotes, f.options().quote_style()) + } else { + preferred_quotes(raw_content, quotes, f.options().quote_style()) + }; write!(f, [prefix, preferred_quotes])?; let (normalized, contains_newlines) = - normalize_string(raw_content, preferred_quotes, prefix.is_raw_string()); + normalize_string(raw_content, preferred_quotes, is_raw_string); match normalized { Cow::Borrowed(_) => { @@ -295,6 +300,53 @@ impl Format> for StringPrefix { } } +/// Detects the preferred quotes for raw string `input`. +fn preferred_quotes_raw( + input: &str, + quotes: StringQuotes, + configured_style: QuoteStyle, +) -> StringQuotes { + let configured_quote_char = configured_style.as_char(); + let mut chars = input.chars().peekable(); + let contains_unescaped_configured_quotes = loop { + match chars.next() { + Some('\\') => { + // Ignore escaped characters + chars.next(); + } + // `"` or `'` + Some(c) if c == configured_quote_char => { + if !quotes.triple { + break true; + } + + if chars.peek() == Some(&configured_quote_char) { + // `""` or `''` + chars.next(); + + if chars.peek() == Some(&configured_quote_char) { + // `"""` or `'''` + break true; + } + } + } + Some(_) => continue, + None => break false, + } + }; + + StringQuotes { + triple: quotes.triple, + // If unescaped configured quotes are found, we can't change the configured quote style. + // For example, `r' " '` can't be changed to `r" " "`. + style: if contains_unescaped_configured_quotes { + quotes.style + } else { + configured_style + }, + } +} + /// Detects the preferred quotes for `input`. /// * single quoted strings: The preferred quote style is the one that requires less escape sequences. /// * triple quoted strings: Use double quotes except the string contains a sequence of `"""`. diff --git a/crates/ruff_python_formatter/tests/snapshots/black_compatibility@miscellaneous__string_quotes.py.snap b/crates/ruff_python_formatter/tests/snapshots/black_compatibility@miscellaneous__string_quotes.py.snap index 7eb9f43769a44..9473281ef2a44 100644 --- a/crates/ruff_python_formatter/tests/snapshots/black_compatibility@miscellaneous__string_quotes.py.snap +++ b/crates/ruff_python_formatter/tests/snapshots/black_compatibility@miscellaneous__string_quotes.py.snap @@ -84,10 +84,9 @@ f"\"{a}\"{'hello' * b}\"{c}\"" r"raw string ftw" r"Date d\'expiration:(.*)" r'Tricky "quote' --r"Not-so-tricky \"quote" + r"Not-so-tricky \"quote" -rf"{yay}" -"\nThe \"quick\"\nbrown fox\njumps over\nthe 'lazy' dog.\n" -+r'Not-so-tricky \"quote' +f"NOT_YET_IMPLEMENTED_ExprJoinedStr" +"\n\ +The \"quick\"\n\ @@ -148,7 +147,7 @@ f"NOT_YET_IMPLEMENTED_ExprJoinedStr" r"raw string ftw" r"Date d\'expiration:(.*)" r'Tricky "quote' -r'Not-so-tricky \"quote' +r"Not-so-tricky \"quote" f"NOT_YET_IMPLEMENTED_ExprJoinedStr" "\n\ The \"quick\"\n\ From c4406dd5bc511e7523e58c754890f8259d127c36 Mon Sep 17 00:00:00 2001 From: harupy Date: Sun, 30 Jul 2023 13:42:37 +0900 Subject: [PATCH 5/7] Fix comment --- crates/ruff_python_formatter/src/expression/string.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/ruff_python_formatter/src/expression/string.rs b/crates/ruff_python_formatter/src/expression/string.rs index 9b68a86e8ae54..6ceef87a1b212 100644 --- a/crates/ruff_python_formatter/src/expression/string.rs +++ b/crates/ruff_python_formatter/src/expression/string.rs @@ -337,7 +337,7 @@ fn preferred_quotes_raw( StringQuotes { triple: quotes.triple, - // If unescaped configured quotes are found, we can't change the configured quote style. + // If unescaped configured quotes are found, we can't change the quote style. // For example, `r' " '` can't be changed to `r" " "`. style: if contains_unescaped_configured_quotes { quotes.style From 5e051e83d35a7ae41bd4c3fd232c495869282310 Mon Sep 17 00:00:00 2001 From: harupy Date: Sun, 30 Jul 2023 14:02:54 +0900 Subject: [PATCH 6/7] Fix comment --- crates/ruff_python_formatter/src/expression/string.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/ruff_python_formatter/src/expression/string.rs b/crates/ruff_python_formatter/src/expression/string.rs index 6ceef87a1b212..317a0b6a6ca5c 100644 --- a/crates/ruff_python_formatter/src/expression/string.rs +++ b/crates/ruff_python_formatter/src/expression/string.rs @@ -337,7 +337,7 @@ fn preferred_quotes_raw( StringQuotes { triple: quotes.triple, - // If unescaped configured quotes are found, we can't change the quote style. + // If unescaped configured quotes are found, the quote style can't be changed. // For example, `r' " '` can't be changed to `r" " "`. style: if contains_unescaped_configured_quotes { quotes.style From 9fab16b427124dfe120d6486fc36dd9f71c93087 Mon Sep 17 00:00:00 2001 From: harupy Date: Mon, 31 Jul 2023 21:40:01 +0900 Subject: [PATCH 7/7] Update doc --- crates/ruff_python_formatter/src/expression/string.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/crates/ruff_python_formatter/src/expression/string.rs b/crates/ruff_python_formatter/src/expression/string.rs index 317a0b6a6ca5c..caa1a0c60290b 100644 --- a/crates/ruff_python_formatter/src/expression/string.rs +++ b/crates/ruff_python_formatter/src/expression/string.rs @@ -301,6 +301,9 @@ impl Format> for StringPrefix { } /// Detects the preferred quotes for raw string `input`. +/// The configured quote style is preferred unless `input` contains unescaped quotes of the +/// configured style. For example, `r"foo"` is preferred over `r'foo'` if the configured +/// quote style is double quotes. fn preferred_quotes_raw( input: &str, quotes: StringQuotes, @@ -337,8 +340,6 @@ fn preferred_quotes_raw( StringQuotes { triple: quotes.triple, - // If unescaped configured quotes are found, the quote style can't be changed. - // For example, `r' " '` can't be changed to `r" " "`. style: if contains_unescaped_configured_quotes { quotes.style } else {