Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions crates/ruff_linter/resources/test/fixtures/ruff/RUF055_3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import re

b_src = b"abc"

# Should be replaced with `b_src.replace(rb"x", b"y")`
re.sub(rb"x", b"y", b_src)

# Should be replaced with `b_src.startswith(rb"abc")`
if re.match(rb"abc", b_src):
pass

# Should be replaced with `rb"x" in b_src`
if re.search(rb"x", b_src):
pass

# Should be replaced with `b_src.split(rb"abc")`
re.split(rb"abc", b_src)

# Patterns containing metacharacters should NOT be replaced
re.sub(rb"ab[c]", b"", b_src)
re.match(rb"ab[c]", b_src)
re.search(rb"ab[c]", b_src)
re.fullmatch(rb"ab[c]", b_src)
re.split(rb"ab[c]", b_src)
1 change: 1 addition & 0 deletions crates/ruff_linter/src/rules/ruff/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -534,6 +534,7 @@ mod tests {
#[test_case(Rule::UnnecessaryRegularExpression, Path::new("RUF055_0.py"))]
#[test_case(Rule::UnnecessaryRegularExpression, Path::new("RUF055_1.py"))]
#[test_case(Rule::UnnecessaryRegularExpression, Path::new("RUF055_2.py"))]
#[test_case(Rule::UnnecessaryRegularExpression, Path::new("RUF055_3.py"))]
#[test_case(Rule::PytestRaisesAmbiguousPattern, Path::new("RUF043.py"))]
#[test_case(Rule::IndentedFormFeed, Path::new("RUF054.py"))]
#[test_case(Rule::ImplicitClassVarInDataclass, Path::new("RUF045.py"))]
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
use itertools::Itertools;
use ruff_macros::{ViolationMetadata, derive_message_formats};
use ruff_python_ast::{
Arguments, CmpOp, Expr, ExprAttribute, ExprCall, ExprCompare, ExprContext, ExprStringLiteral,
ExprUnaryOp, Identifier, UnaryOp,
Arguments, CmpOp, Expr, ExprAttribute, ExprBytesLiteral, ExprCall, ExprCompare, ExprContext,
ExprStringLiteral, ExprUnaryOp, Identifier, UnaryOp,
};
use ruff_python_semantic::analyze::typing::find_binding_value;
use ruff_python_semantic::{Modules, SemanticModel};
Expand Down Expand Up @@ -72,6 +72,9 @@ impl Violation for UnnecessaryRegularExpression {
}
}

const METACHARACTERS: [char; 12] = ['.', '^', '$', '*', '+', '?', '{', '[', '\\', '|', '(', ')'];
const ESCAPABLE_SINGLE_CHARACTERS: &str = "abfnrtv";

/// RUF055
pub(crate) fn unnecessary_regular_expression(checker: &Checker, call: &ExprCall) {
// adapted from unraw_re_pattern
Expand All @@ -96,16 +99,19 @@ pub(crate) fn unnecessary_regular_expression(checker: &Checker, call: &ExprCall)
};

// For now, restrict this rule to string literals and variables that can be resolved to literals
let Some(string_lit) = resolve_string_literal(re_func.pattern, semantic) else {
let Some(literal) = resolve_literal(re_func.pattern, semantic) else {
return;
};

// For now, reject any regex metacharacters. Compare to the complete list
// from https://docs.python.org/3/howto/regex.html#matching-characters
let has_metacharacters = string_lit
.value
.to_str()
.contains(['.', '^', '$', '*', '+', '?', '{', '[', '\\', '|', '(', ')']);
let has_metacharacters = match &literal {
Literal::Str(str_lit) => str_lit.value.to_str().contains(METACHARACTERS),
Literal::Bytes(bytes_lit) => bytes_lit
.value
.iter()
.any(|part| part.iter().any(|&b| METACHARACTERS.contains(&(b as char)))),
};

if has_metacharacters {
return;
Expand Down Expand Up @@ -186,28 +192,48 @@ impl<'a> ReFunc<'a> {
// version
("sub", 3) => {
let repl = call.arguments.find_argument_value("repl", 1)?;
let lit = resolve_string_literal(repl, semantic)?;
let lit = resolve_literal(repl, semantic)?;
let mut fixable = true;
for (c, next) in lit.value.chars().tuple_windows() {
// `\0` (or any other ASCII digit) and `\g` have special meaning in `repl` strings.
// Meanwhile, nearly all other escapes of ASCII letters in a `repl` string causes
// `re.PatternError` to be raised at runtime.
//
// If we see that the escaped character is an alphanumeric ASCII character,
// we should only emit a diagnostic suggesting to replace the `re.sub()` call with
// `str.replace`if we can detect that the escaped character is one that is both
// valid in a `repl` string *and* does not have any special meaning in a REPL string.
//
// It's out of scope for this rule to change invalid `re.sub()` calls into something
// that would not raise an exception at runtime. They should be left as-is.
if c == '\\' && next.is_ascii_alphanumeric() {
if "abfnrtv".contains(next) {
fixable = false;
} else {
return None;

match lit {
Literal::Str(lit_str) => {
// Perform escape analysis for replacement literals.
for (c, next) in lit_str.value.to_str().chars().tuple_windows() {
// `\\0` (or any other ASCII digit) and `\\g` have special meaning in `repl` strings.
// Meanwhile, nearly all other escapes of ASCII letters in a `repl` string causes
// `re.PatternError` to be raised at runtime.
//
// If we see that the escaped character is an alphanumeric ASCII character,
// we should only emit a diagnostic suggesting to replace the `re.sub()` call with
// `str.replace`if we can detect that the escaped character is one that is both
// valid in a `repl` string *and* does not have any special meaning in a REPL string.
//
// It's out of scope for this rule to change invalid `re.sub()` calls into something
// that would not raise an exception at runtime. They should be left as-is.
if c == '\\' && next.is_ascii_alphanumeric() {
if ESCAPABLE_SINGLE_CHARACTERS.contains(next) {
fixable = false;
} else {
return None;
}
}
}
}
Literal::Bytes(lit_bytes) => {
for part in &lit_bytes.value {
for (byte, next) in part.iter().copied().tuple_windows() {
if byte == b'\\' && (next as char).is_ascii_alphanumeric() {
if ESCAPABLE_SINGLE_CHARACTERS.contains(next as char) {
fixable = false;
} else {
return None;
}
}
}
}
}
}

Some(ReFunc {
kind: ReFuncKind::Sub {
repl: fixable.then_some(repl),
Expand Down Expand Up @@ -329,6 +355,43 @@ impl<'a> ReFunc<'a> {
}
}

/// A literal that can be either a string or a bytes literal.
enum Literal<'a> {
Str(&'a ExprStringLiteral),
Bytes(&'a ExprBytesLiteral),
}

/// Try to resolve `name` to either a string or bytes literal in `semantic`.
fn resolve_literal<'a>(name: &'a Expr, semantic: &'a SemanticModel) -> Option<Literal<'a>> {
if let Some(str_lit) = resolve_string_literal(name, semantic) {
return Some(Literal::Str(str_lit));
}
if let Some(bytes_lit) = resolve_bytes_literal(name, semantic) {
return Some(Literal::Bytes(bytes_lit));
}
None
}

/// Try to resolve `name` to an [`ExprBytesLiteral`] in `semantic`.
fn resolve_bytes_literal<'a>(
name: &'a Expr,
semantic: &'a SemanticModel,
) -> Option<&'a ExprBytesLiteral> {
if name.is_bytes_literal_expr() {
return name.as_bytes_literal_expr();
}

if let Some(name_expr) = name.as_name_expr() {
let binding = semantic.binding(semantic.only_binding(name_expr)?);
let value = find_binding_value(binding, semantic)?;
if value.is_bytes_literal_expr() {
return value.as_bytes_literal_expr();
}
}

None
}

/// Try to resolve `name` to an [`ExprStringLiteral`] in `semantic`.
fn resolve_string_literal<'a>(
name: &'a Expr,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
---
source: crates/ruff_linter/src/rules/ruff/mod.rs
---
RUF055_3.py:6:1: RUF055 [*] Plain string pattern passed to `re` function
|
5 | # Should be replaced with `b_src.replace(rb"x", b"y")`
6 | re.sub(rb"x", b"y", b_src)
| ^^^^^^^^^^^^^^^^^^^^^^^^^^ RUF055
7 |
8 | # Should be replaced with `b_src.startswith(rb"abc")`
|
= help: Replace with `b_src.replace(rb"x", b"y")`

ℹ Safe fix
3 3 | b_src = b"abc"
4 4 |
5 5 | # Should be replaced with `b_src.replace(rb"x", b"y")`
6 |-re.sub(rb"x", b"y", b_src)
6 |+b_src.replace(rb"x", b"y")
7 7 |
8 8 | # Should be replaced with `b_src.startswith(rb"abc")`
9 9 | if re.match(rb"abc", b_src):

RUF055_3.py:9:4: RUF055 [*] Plain string pattern passed to `re` function
|
8 | # Should be replaced with `b_src.startswith(rb"abc")`
9 | if re.match(rb"abc", b_src):
| ^^^^^^^^^^^^^^^^^^^^^^^^ RUF055
10 | pass
|
= help: Replace with `b_src.startswith(rb"abc")`

ℹ Safe fix
6 6 | re.sub(rb"x", b"y", b_src)
7 7 |
8 8 | # Should be replaced with `b_src.startswith(rb"abc")`
9 |-if re.match(rb"abc", b_src):
9 |+if b_src.startswith(rb"abc"):
10 10 | pass
11 11 |
12 12 | # Should be replaced with `rb"x" in b_src`

RUF055_3.py:13:4: RUF055 [*] Plain string pattern passed to `re` function
|
12 | # Should be replaced with `rb"x" in b_src`
13 | if re.search(rb"x", b_src):
| ^^^^^^^^^^^^^^^^^^^^^^^ RUF055
14 | pass
|
= help: Replace with `rb"x" in b_src`

ℹ Safe fix
10 10 | pass
11 11 |
12 12 | # Should be replaced with `rb"x" in b_src`
13 |-if re.search(rb"x", b_src):
13 |+if rb"x" in b_src:
14 14 | pass
15 15 |
16 16 | # Should be replaced with `b_src.split(rb"abc")`

RUF055_3.py:17:1: RUF055 [*] Plain string pattern passed to `re` function
|
16 | # Should be replaced with `b_src.split(rb"abc")`
17 | re.split(rb"abc", b_src)
| ^^^^^^^^^^^^^^^^^^^^^^^^ RUF055
18 |
19 | # Patterns containing metacharacters should NOT be replaced
|
= help: Replace with `b_src.split(rb"abc")`

ℹ Safe fix
14 14 | pass
15 15 |
16 16 | # Should be replaced with `b_src.split(rb"abc")`
17 |-re.split(rb"abc", b_src)
17 |+b_src.split(rb"abc")
18 18 |
19 19 | # Patterns containing metacharacters should NOT be replaced
20 20 | re.sub(rb"ab[c]", b"", b_src)
Loading