Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ruff] Implement unnecessary-regular-expression (RUF055) #14659

Merged
merged 35 commits into from
Nov 28, 2024
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
d9d365d
add unnecessary regular expression rule
ntBre Nov 27, 2024
f59c38d
add test case
ntBre Nov 27, 2024
2538401
renumber to 055
ntBre Nov 27, 2024
c2d1256
only applies to `re` for now
ntBre Nov 27, 2024
e05d91c
match on nargs too instead of guard
ntBre Nov 27, 2024
1c871e3
lint
ntBre Nov 27, 2024
fa0651f
generate schema
ntBre Nov 27, 2024
6652fe8
restrict the context for suggestions
ntBre Nov 27, 2024
d56c079
update tests
ntBre Nov 27, 2024
9025495
use in_boolean_test
ntBre Nov 28, 2024
39391f4
update summary with more constraints
ntBre Nov 28, 2024
916fd73
generate Exprs instead of using format
ntBre Nov 28, 2024
ef1da87
rename to avoid double negation
ntBre Nov 28, 2024
fa16e9e
use locate_arg
ntBre Nov 28, 2024
ce3fb1a
check that additional arguments prevent the rule
ntBre Nov 28, 2024
0c10156
remove extra newline
ntBre Nov 28, 2024
bc04c1f
use str::contains instead of chars::any
ntBre Nov 28, 2024
57d8bc1
as_ref to &
ntBre Nov 28, 2024
c170945
retrieve pat from re_func now
ntBre Nov 28, 2024
265a4d0
inline locate_arg
ntBre Nov 28, 2024
e66bced
fmt
ntBre Nov 28, 2024
ec3346f
add brief summary and move details to the end
ntBre Nov 28, 2024
046816a
from_call_expr only needs SemanticModel, not Checker
ntBre Nov 28, 2024
6d8e9b1
move unnecessary_regular_expression to the top
ntBre Nov 28, 2024
421b30a
check safety based on comments, add test case, add docs
ntBre Nov 28, 2024
27773b8
test all metacharacters and raw strings
ntBre Nov 28, 2024
a3f7cd4
update snapshot, unsafe fix moved down in the file
ntBre Nov 28, 2024
31887d5
use with_fix
ntBre Nov 28, 2024
742a936
inline nargs
ntBre Nov 28, 2024
2639ac2
add reference
ntBre Nov 28, 2024
1095362
use Fix::applicable_edit
ntBre Nov 28, 2024
70e1150
improve docs
ntBre Nov 28, 2024
11a7f40
tidy applicable_edit
ntBre Nov 28, 2024
1ad19a8
remove }, ], and ) from metacharacters
ntBre Nov 28, 2024
cc5f75e
extra s
ntBre Nov 28, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions crates/ruff_linter/resources/test/fixtures/ruff/RUF055.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,31 @@ def dashrepl(matchobj):
re.fullmatch("ab[c]", s)
re.split("ab[c]", s)

# test that all of the metacharacters prevent the rule from triggering, also
# use raw strings in line with RUF039
re.sub(r"abc.", "", s)
re.sub(r"^abc", "", s)
re.sub(r"abc$", "", s)
re.sub(r"abc*", "", s)
re.sub(r"abc+", "", s)
re.sub(r"abc?", "", s)
re.sub(r"abc{2,3}", "", s)
re.sub(r"abc\n", "", s) # this one could be fixed but is not currently
re.sub(r"abc|def", "", s)
re.sub(r"(a)bc", "", s)

# and these should not be modified because they have extra arguments
re.sub("abc", "", s, flags=re.A)
re.match("abc", s, flags=re.I)
re.search("abc", s, flags=re.L)
re.fullmatch("abc", s, flags=re.M)
re.split("abc", s, maxsplit=2)
AlexWaygood marked this conversation as resolved.
Show resolved Hide resolved

# this should trigger an unsafe fix because of the presence of comments
re.sub(
# pattern
"abc",
# repl
"",
s, # string
)
Original file line number Diff line number Diff line change
Expand Up @@ -3,25 +3,14 @@ use ruff_macros::{derive_message_formats, ViolationMetadata};
use ruff_python_ast::{
Arguments, CmpOp, Expr, ExprAttribute, ExprCall, ExprCompare, ExprContext, Identifier,
};
use ruff_python_semantic::Modules;
use ruff_python_semantic::{Modules, SemanticModel};
use ruff_text_size::TextRange;

use crate::checkers::ast::Checker;

/// ## What it does
///
/// Reports the following `re` calls when their first arguments are plain string
/// literals, and no additional flags are passed:
///
/// - `sub`
/// - `match`
/// - `search`
/// - `fullmatch`
/// - `split`
///
/// For `sub`, the `repl` (replacement) argument must also be a string literal,
/// not a function. For `match`, `search`, and `fullmatch`, the return value
/// must also be used only for its truth value.
/// Checks for uses of the `re` module that can be replaced with builtin `str` methods.
///
/// ## Why is this bad?
///
Expand All @@ -39,6 +28,26 @@ use crate::checkers::ast::Checker;
/// ```python
/// s.replace("abc", "")
/// ```
///
/// ## Details
///
/// Reports the following `re` calls when their first arguments are plain string
/// literals, and no additional flags are passed:
///
/// - `sub`
/// - `match`
/// - `search`
/// - `fullmatch`
/// - `split`
///
/// For `sub`, the `repl` (replacement) argument must also be a string literal,
/// not a function. For `match`, `search`, and `fullmatch`, the return value
/// must also be used only for its truth value.
ntBre marked this conversation as resolved.
Show resolved Hide resolved
///
/// ## Fix safety
///
/// This rule's fix is marked as unsafe if the affected expression contains comments. Otherwise,
/// the fix can be applied safely.
ntBre marked this conversation as resolved.
Show resolved Hide resolved
#[derive(ViolationMetadata)]
pub(crate) struct UnnecessaryRegularExpression {
replacement: String,
Expand All @@ -55,6 +64,71 @@ impl AlwaysFixableViolation for UnnecessaryRegularExpression {
}
}

/// RUF055
pub(crate) fn unnecessary_regular_expression(checker: &mut Checker, call: &ExprCall) {
// adapted from unraw_re_pattern
let semantic = checker.semantic();

if !semantic.seen_module(Modules::RE) {
return;
}

let Some(qualified_name) = semantic.resolve_qualified_name(&call.func) else {
return;
};

let ["re", func] = qualified_name.segments() else {
return;
};

// skip calls with more than `pattern` and `string` arguments (and `repl`
// for `sub`)
let Some(re_func) = ReFunc::from_call_expr(semantic, call, func) else {
return;
};

// For now, restrict this rule to string literals
let Some(string_lit) = re_func.pattern.as_string_literal_expr() else {
return;
};

// For now, reject any regex metacharacters. Compare to the complete list
// from https://docs.python.org/3/howto/regex.html#matching-characters
let has_metacharacters = string_lit.value.to_str().contains([
'.', '^', '$', '*', '+', '?', '{', '}', '[', ']', '\\', '|', '(', ')',
AlexWaygood marked this conversation as resolved.
Show resolved Hide resolved
]);

if has_metacharacters {
return;
}

// Here we know the pattern is a string literal with no metacharacters, so
// we can proceed with the str method replacement
let new_expr = re_func.replacement();

let repl = checker.generator().expr(&new_expr);
let mut diagnostic = Diagnostic::new(
UnnecessaryRegularExpression {
replacement: repl.clone(),
},
call.range,
);

let edit = Edit::range_replacement(repl, call.range);
let fix = if checker
.comment_ranges()
.has_comments(call, checker.source())
{
Fix::unsafe_edit(edit)
} else {
Fix::safe_edit(edit)
};
ntBre marked this conversation as resolved.
Show resolved Hide resolved

diagnostic.set_fix(fix);

checker.diagnostics.push(diagnostic);
ntBre marked this conversation as resolved.
Show resolved Hide resolved
}

/// The `re` functions supported by this rule.
#[derive(Debug)]
enum ReFuncKind<'a> {
Expand All @@ -74,53 +148,52 @@ struct ReFunc<'a> {

impl<'a> ReFunc<'a> {
fn from_call_expr(
checker: &'a mut Checker,
semantic: &SemanticModel,
call: &'a ExprCall,
func_name: &str,
) -> Option<Self> {
let nargs = call.arguments.len();
let locate_arg = |name, position| call.arguments.find_argument(name, position);

// the proposed fixes for match, search, and fullmatch rely on the
// return value only being used for its truth value
let in_if_context = checker.semantic().in_boolean_test();
let in_if_context = semantic.in_boolean_test();

match (func_name, nargs) {
// `split` is the safest of these to fix, as long as metacharacters
// have already been filtered out from the `pattern`
("split", 2) => Some(ReFunc {
kind: ReFuncKind::Split,
pattern: locate_arg("pattern", 0)?,
string: locate_arg("string", 1)?,
pattern: call.arguments.find_argument("pattern", 0)?,
string: call.arguments.find_argument("string", 1)?,
}),
// `sub` is only safe to fix if `repl` is a string. `re.sub` also
// allows it to be a function, which will *not* work in the str
// version
("sub", 3) => {
let repl = locate_arg("repl", 1)?;
let repl = call.arguments.find_argument("repl", 1)?;
if !repl.is_string_literal_expr() {
return None;
}
Some(ReFunc {
kind: ReFuncKind::Sub { repl },
pattern: locate_arg("pattern", 0)?,
string: locate_arg("string", 2)?,
pattern: call.arguments.find_argument("pattern", 0)?,
string: call.arguments.find_argument("string", 2)?,
})
}
("match", 2) if in_if_context => Some(ReFunc {
kind: ReFuncKind::Match,
pattern: locate_arg("pattern", 0)?,
string: locate_arg("string", 1)?,
pattern: call.arguments.find_argument("pattern", 0)?,
string: call.arguments.find_argument("string", 1)?,
}),
("search", 2) if in_if_context => Some(ReFunc {
kind: ReFuncKind::Search,
pattern: locate_arg("pattern", 0)?,
string: locate_arg("string", 1)?,
pattern: call.arguments.find_argument("pattern", 0)?,
string: call.arguments.find_argument("string", 1)?,
}),
("fullmatch", 2) if in_if_context => Some(ReFunc {
kind: ReFuncKind::Fullmatch,
pattern: locate_arg("pattern", 0)?,
string: locate_arg("string", 1)?,
pattern: call.arguments.find_argument("pattern", 0)?,
string: call.arguments.find_argument("string", 1)?,
}),
_ => None,
}
Expand Down Expand Up @@ -173,66 +246,3 @@ impl<'a> ReFunc<'a> {
})
}
}

/// RUF055
pub(crate) fn unnecessary_regular_expression(checker: &mut Checker, call: &ExprCall) {
// adapted from unraw_re_pattern
let semantic = checker.semantic();

if !semantic.seen_module(Modules::RE) {
return;
}

let Some(qualified_name) = semantic.resolve_qualified_name(&call.func) else {
return;
};

let ["re", func] = qualified_name.segments() else {
return;
};

// skip calls with more than `pattern` and `string` arguments (and `repl`
// for `sub`)
let Some(re_func) = ReFunc::from_call_expr(checker, call, func) else {
return;
};

let Some(pat) = call.arguments.find_argument("pattern", 0) else {
// this should be unreachable given the checks above, so it might be
// safe to unwrap here instead
return;
};

// For now, restrict this rule to string literals
let Some(string_lit) = pat.as_string_literal_expr() else {
return;
};

// For now, reject any regex metacharacters. Compare to the complete list
// from https://docs.python.org/3/howto/regex.html#matching-characters
let has_metacharacters = string_lit.value.to_str().contains(['.', '^', '$', '*', '+', '?', '{', '}', '[', ']', '\\', '|', '(', ')']);

if has_metacharacters {
return;
}

// Here we know the pattern is a string literal with no metacharacters, so
// we can proceed with the str method replacement
let new_expr = re_func.replacement();

let repl = checker.generator().expr(&new_expr);
let mut diagnostic = Diagnostic::new(
UnnecessaryRegularExpression {
replacement: repl.clone(),
},
call.range,
);

diagnostic.set_fix(Fix::safe_edit(Edit::replacement(
repl,
call.range.start(),
call.range.end(),
)));

checker.diagnostics.push(diagnostic);
}
Original file line number Diff line number Diff line change
Expand Up @@ -100,3 +100,30 @@ RUF055.py:39:1: RUF055 [*] Plain string pattern passed to `re` function
40 40 |
41 41 | # these currently should not be modified because the patterns contain regex
42 42 | # metacharacters

RUF055.py:70:1: RUF055 [*] Plain string pattern passed to `re` function
|
69 | # this should trigger an unsafe fix because of the presence of comments
70 | / re.sub(
71 | | # pattern
72 | | "abc",
73 | | # repl
74 | | "",
75 | | s, # string
76 | | )
| |_^ RUF055
|
= help: Replace with `s.replace("abc", "")`

ℹ Unsafe fix
67 67 | re.split("abc", s, maxsplit=2)
68 68 |
69 69 | # this should trigger an unsafe fix because of the presence of comments
70 |-re.sub(
71 |- # pattern
72 |- "abc",
73 |- # repl
74 |- "",
75 |- s, # string
76 |-)
70 |+s.replace("abc", "")