astral-sh · MichaReiser · Oct 7, 2024 · Oct 4, 2024 · Oct 4, 2024 · Oct 4, 2024
diff --git a/crates/ruff_linter/resources/test/fixtures/refurb/FURB188.py b/crates/ruff_linter/resources/test/fixtures/refurb/FURB188.py
@@ -169,4 +169,32 @@ def ignore_step():
     text = "!x!y!z"
     if text.startswith("!"):
         text = text[1::2]
-    print(text)
+    print(text)
+
+def handle_unicode():
+    # should be skipped!
+    text = "řetězec"
+    if text.startswith("ř"): 
+        text = text[2:]
+
+    # should be linted
+    # with fix `text = text.removeprefix("ř")`
+    text = "řetězec"
+    if text.startswith("ř"): 
+        text = text[1:]
+
+
+def handle_surrogates():
+    # should be linted
+    text = "\ud800\udc00heythere"
+    if text.startswith("\ud800\udc00"):
+        text = text[2:]
+    text = "\U00010000heythere"
+    if text.startswith("\U00010000"):
+        text = text[1:]
+
+    # should not be linted
+    text = "\ud800\udc00heythere"
+    if text.startswith("\ud800\udc00"):
+        text = text[1:]
+
diff --git a/crates/ruff_linter/src/rules/refurb/rules/slice_to_remove_prefix_or_suffix.rs b/crates/ruff_linter/src/rules/refurb/rules/slice_to_remove_prefix_or_suffix.rs
@@ -4,7 +4,7 @@ use ruff_macros::{derive_message_formats, violation};
 use ruff_python_ast as ast;
 use ruff_python_semantic::SemanticModel;
 use ruff_source_file::Locator;
-use ruff_text_size::{Ranged, TextLen};
+use ruff_text_size::Ranged;
 
 /// ## What it does
 /// Checks for the removal of a prefix or suffix from a string by assigning
@@ -334,8 +334,10 @@ fn affix_matches_slice_bound(data: &RemoveAffixData, semantic: &SemanticModel) -
             }),
         ) => num
             .as_int()
-            .and_then(ast::Int::as_u32) // Only support prefix removal for size at most `u32::MAX`
-            .is_some_and(|x| x == string_val.to_str().text_len().to_u32()),
+            // Only support prefix removal for size at most `u32::MAX`
+            .and_then(ast::Int::as_u32)
+            .and_then(|x| usize::try_from(x).ok())
-            .and_then(ast::Int::as_u32)
-            .and_then(|x| usize::try_from(x).ok())
+            .and_then(ast::Int::as_u64)
+            .and_then(|x| usize::try_from(x).ok())
-            .and_then(ast::Int::as_u32)
-            .and_then(|x| usize::try_from(x).ok())
+            .and_then(ast::Int::as_u64)
+            .and_then(|x| usize::try_from(x).ok())
+            .is_some_and(|x| x == string_val.to_str().chars().count()),
-            .is_some_and(|x| x == string_val.to_str().chars().count()),
+            .is_some_and(|x| x == string_val.chars().count()),
-            .is_some_and(|x| x == string_val.to_str().chars().count()),
+            .is_some_and(|x| x == string_val.chars().count()),
         (
             AffixKind::StartsWith,
             ast::Expr::Call(ast::ExprCall {
@@ -370,7 +372,8 @@ fn affix_matches_slice_bound(data: &RemoveAffixData, semantic: &SemanticModel) -
                 value
                     .as_int()
                     .and_then(ast::Int::as_u32)
-                    .is_some_and(|x| x == string_val.to_str().text_len().to_u32())
+                    .and_then(|x| usize::try_from(x).ok())
+                    .is_some_and(|x| x == string_val.to_str().chars().count())
-                    .is_some_and(|x| x == string_val.to_str().chars().count())
+                    .is_some_and(|x| x == string_val.chars().count())
-                    .is_some_and(|x| x == string_val.to_str().chars().count())
+                    .is_some_and(|x| x == string_val.chars().count())
             },
         ),
         (

diff --git a/...ter/src/rules/refurb/snapshots/ruff_linter__rules__refurb__tests__FURB188_FURB188.py.snap b/...ter/src/rules/refurb/snapshots/ruff_linter__rules__refurb__tests__FURB188_FURB188.py.snap
@@ -250,4 +250,74 @@ FURB188.py:162:5: FURB188 [*] Prefer `removeprefix` over conditionally replacing
     162 |+    text = text.removeprefix("!")
 164 163 |     print(text)
 165 164 | 
-166 165 |
+166 165 | 
+
+FURB188.py:183:5: FURB188 [*] Prefer `removeprefix` over conditionally replacing with slice.
+    |
+181 |       # with fix `text = text.removeprefix("ř")`
+182 |       text = "řetězec"
+183 |       if text.startswith("ř"): 
+    |  _____^
+184 | |         text = text[1:]
+    | |_______________________^ FURB188
+    |
+    = help: Use removeprefix instead of assignment conditional upon startswith.
+
+ℹ Safe fix
+180 180 |     # should be linted
+181 181 |     # with fix `text = text.removeprefix("ř")`
+182 182 |     text = "řetězec"
+183     |-    if text.startswith("ř"): 
+184     |-        text = text[1:]
+    183 |+    text = text.removeprefix("ř")
+185 184 | 
+186 185 | 
+187 186 | def handle_surrogates():
+
+FURB188.py:190:5: FURB188 [*] Prefer `removeprefix` over conditionally replacing with slice.
+    |
+188 |       # should be linted
+189 |       text = "\ud800\udc00heythere"
+190 |       if text.startswith("\ud800\udc00"):
+    |  _____^
+191 | |         text = text[2:]
+    | |_______________________^ FURB188
+192 |       text = "\U00010000heythere"
+193 |       if text.startswith("\U00010000"):
+    |
+    = help: Use removeprefix instead of assignment conditional upon startswith.
+
+ℹ Safe fix
+187 187 | def handle_surrogates():
+188 188 |     # should be linted
+189 189 |     text = "\ud800\udc00heythere"
+190     |-    if text.startswith("\ud800\udc00"):
+191     |-        text = text[2:]
+    190 |+    text = text.removeprefix("\ud800\udc00")
+192 191 |     text = "\U00010000heythere"
+193 192 |     if text.startswith("\U00010000"):
+194 193 |         text = text[1:]
+
+FURB188.py:193:5: FURB188 [*] Prefer `removeprefix` over conditionally replacing with slice.
+    |
+191 |           text = text[2:]
+192 |       text = "\U00010000heythere"
+193 |       if text.startswith("\U00010000"):
+    |  _____^
+194 | |         text = text[1:]
+    | |_______________________^ FURB188
+195 |       
+196 |       # should not be linted
+    |
+    = help: Use removeprefix instead of assignment conditional upon startswith.
+
+ℹ Safe fix
+190 190 |     if text.startswith("\ud800\udc00"):
+191 191 |         text = text[2:]
+192 192 |     text = "\U00010000heythere"
+193     |-    if text.startswith("\U00010000"):
+194     |-        text = text[1:]
+    193 |+    text = text.removeprefix("\U00010000")
+195 194 |     
+196 195 |     # should not be linted
+197 196 |     text = "\ud800\udc00heythere"