Skip to content

Commit 753629b

Browse files
committed
Auto merge of rust-lang#13412 - GnomedDev:regex-comp-in-loop, r=y21
Implement lint for regex::Regex compilation inside a loop Closes rust-lang#598. Seems like a pretty simple one, I'm not sure if I sorted out all the lint plumbing correctly because I was adding it to the existing regex pass, but seems to work. The name is a bit jank and I'm super open to suggestions for changing it. changelog: [`regex_creation_in_loops`]: Added lint for Regex compilation inside loops.
2 parents f173eb3 + 196dbcf commit 753629b

File tree

5 files changed

+145
-4
lines changed

5 files changed

+145
-4
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -5874,6 +5874,7 @@ Released 2018-09-13
58745874
[`ref_option`]: https://rust-lang.github.io/rust-clippy/master/index.html#ref_option
58755875
[`ref_option_ref`]: https://rust-lang.github.io/rust-clippy/master/index.html#ref_option_ref
58765876
[`ref_patterns`]: https://rust-lang.github.io/rust-clippy/master/index.html#ref_patterns
5877+
[`regex_creation_in_loops`]: https://rust-lang.github.io/rust-clippy/master/index.html#regex_creation_in_loops
58775878
[`regex_macro`]: https://rust-lang.github.io/rust-clippy/master/index.html#regex_macro
58785879
[`renamed_function_params`]: https://rust-lang.github.io/rust-clippy/master/index.html#renamed_function_params
58795880
[`repeat_once`]: https://rust-lang.github.io/rust-clippy/master/index.html#repeat_once

clippy_lints/src/declared_lints.rs

+1
Original file line numberDiff line numberDiff line change
@@ -639,6 +639,7 @@ pub static LINTS: &[&crate::LintInfo] = &[
639639
crate::ref_patterns::REF_PATTERNS_INFO,
640640
crate::reference::DEREF_ADDROF_INFO,
641641
crate::regex::INVALID_REGEX_INFO,
642+
crate::regex::REGEX_CREATION_IN_LOOPS_INFO,
642643
crate::regex::TRIVIAL_REGEX_INFO,
643644
crate::repeat_vec_with_capacity::REPEAT_VEC_WITH_CAPACITY_INFO,
644645
crate::reserve_after_initialization::RESERVE_AFTER_INITIALIZATION_INFO,

clippy_lints/src/regex.rs

+63-2
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ use clippy_utils::source::SpanRangeExt;
66
use clippy_utils::{def_path_res_with_base, find_crates, path_def_id, paths};
77
use rustc_ast::ast::{LitKind, StrStyle};
88
use rustc_hir::def_id::DefIdMap;
9-
use rustc_hir::{BorrowKind, Expr, ExprKind};
9+
use rustc_hir::{BorrowKind, Expr, ExprKind, OwnerId};
1010
use rustc_lint::{LateContext, LateLintPass};
1111
use rustc_session::impl_lint_pass;
1212
use rustc_span::{BytePos, Span};
@@ -55,6 +55,44 @@ declare_clippy_lint! {
5555
"trivial regular expressions"
5656
}
5757

58+
declare_clippy_lint! {
59+
/// ### What it does
60+
///
61+
/// Checks for [regex](https://crates.io/crates/regex) compilation inside a loop with a literal.
62+
///
63+
/// ### Why is this bad?
64+
///
65+
/// Compiling a regex is a much more expensive operation than using one, and a compiled regex can be used multiple times.
66+
/// This is documented as an antipattern [on the regex documentation](https://docs.rs/regex/latest/regex/#avoid-re-compiling-regexes-especially-in-a-loop)
67+
///
68+
/// ### Example
69+
/// ```no_run
70+
/// # let haystacks = [""];
71+
/// # const MY_REGEX: &str = "a.b";
72+
/// for haystack in haystacks {
73+
/// let regex = regex::Regex::new(MY_REGEX).unwrap();
74+
/// if regex.is_match(haystack) {
75+
/// // Perform operation
76+
/// }
77+
/// }
78+
/// ```
79+
/// can be replaced with
80+
/// ```no_run
81+
/// # let haystacks = [""];
82+
/// # const MY_REGEX: &str = "a.b";
83+
/// let regex = regex::Regex::new(MY_REGEX).unwrap();
84+
/// for haystack in haystacks {
85+
/// if regex.is_match(haystack) {
86+
/// // Perform operation
87+
/// }
88+
/// }
89+
/// ```
90+
#[clippy::version = "1.83.0"]
91+
pub REGEX_CREATION_IN_LOOPS,
92+
perf,
93+
"regular expression compilation performed in a loop"
94+
}
95+
5896
#[derive(Copy, Clone)]
5997
enum RegexKind {
6098
Unicode,
@@ -66,9 +104,10 @@ enum RegexKind {
66104
#[derive(Default)]
67105
pub struct Regex {
68106
definitions: DefIdMap<RegexKind>,
107+
loop_stack: Vec<(OwnerId, Span)>,
69108
}
70109

71-
impl_lint_pass!(Regex => [INVALID_REGEX, TRIVIAL_REGEX]);
110+
impl_lint_pass!(Regex => [INVALID_REGEX, TRIVIAL_REGEX, REGEX_CREATION_IN_LOOPS]);
72111

73112
impl<'tcx> LateLintPass<'tcx> for Regex {
74113
fn check_crate(&mut self, cx: &LateContext<'tcx>) {
@@ -99,12 +138,34 @@ impl<'tcx> LateLintPass<'tcx> for Regex {
99138
&& let Some(def_id) = path_def_id(cx, fun)
100139
&& let Some(regex_kind) = self.definitions.get(&def_id)
101140
{
141+
if let Some(&(loop_item_id, loop_span)) = self.loop_stack.last()
142+
&& loop_item_id == fun.hir_id.owner
143+
&& (matches!(arg.kind, ExprKind::Lit(_)) || const_str(cx, arg).is_some())
144+
{
145+
span_lint_and_help(
146+
cx,
147+
REGEX_CREATION_IN_LOOPS,
148+
fun.span,
149+
"compiling a regex in a loop",
150+
Some(loop_span),
151+
"move the regex construction outside this loop",
152+
);
153+
}
154+
102155
match regex_kind {
103156
RegexKind::Unicode => check_regex(cx, arg, true),
104157
RegexKind::UnicodeSet => check_set(cx, arg, true),
105158
RegexKind::Bytes => check_regex(cx, arg, false),
106159
RegexKind::BytesSet => check_set(cx, arg, false),
107160
}
161+
} else if let ExprKind::Loop(block, _, _, span) = expr.kind {
162+
self.loop_stack.push((block.hir_id.owner, span));
163+
}
164+
}
165+
166+
fn check_expr_post(&mut self, _: &LateContext<'tcx>, expr: &'tcx Expr<'_>) {
167+
if matches!(expr.kind, ExprKind::Loop(..)) {
168+
self.loop_stack.pop();
108169
}
109170
}
110171
}

tests/ui/regex.rs

+29-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
clippy::needless_borrow,
66
clippy::needless_borrows_for_generic_args
77
)]
8-
#![warn(clippy::invalid_regex, clippy::trivial_regex)]
8+
#![warn(clippy::invalid_regex, clippy::trivial_regex, clippy::regex_creation_in_loops)]
99

1010
extern crate regex;
1111

@@ -118,7 +118,35 @@ fn trivial_regex() {
118118
let _ = BRegex::new(r"\b{start}word\b{end}");
119119
}
120120

121+
fn regex_creation_in_loops() {
122+
loop {
123+
static STATIC_REGEX: std::sync::LazyLock<Regex> = std::sync::LazyLock::new(|| Regex::new("a.b").unwrap());
124+
125+
let regex = Regex::new("a.b");
126+
//~^ ERROR: compiling a regex in a loop
127+
let regex = BRegex::new("a.b");
128+
//~^ ERROR: compiling a regex in a loop
129+
#[allow(clippy::regex_creation_in_loops)]
130+
let allowed_regex = Regex::new("a.b");
131+
132+
if true {
133+
let regex = Regex::new("a.b");
134+
//~^ ERROR: compiling a regex in a loop
135+
}
136+
137+
for _ in 0..10 {
138+
let nested_regex = Regex::new("a.b");
139+
//~^ ERROR: compiling a regex in a loop
140+
}
141+
}
142+
143+
for i in 0..10 {
144+
let dependant_regex = Regex::new(&format!("{i}"));
145+
}
146+
}
147+
121148
fn main() {
122149
syntax_error();
123150
trivial_regex();
151+
regex_creation_in_loops();
124152
}

tests/ui/regex.stderr

+51-1
Original file line numberDiff line numberDiff line change
@@ -195,5 +195,55 @@ LL | let binary_trivial_empty = BRegex::new("^$");
195195
|
196196
= help: consider using `str::is_empty`
197197

198-
error: aborting due to 24 previous errors
198+
error: compiling a regex in a loop
199+
--> tests/ui/regex.rs:125:21
200+
|
201+
LL | let regex = Regex::new("a.b");
202+
| ^^^^^^^^^^
203+
|
204+
help: move the regex construction outside this loop
205+
--> tests/ui/regex.rs:122:5
206+
|
207+
LL | loop {
208+
| ^^^^
209+
= note: `-D clippy::regex-creation-in-loops` implied by `-D warnings`
210+
= help: to override `-D warnings` add `#[allow(clippy::regex_creation_in_loops)]`
211+
212+
error: compiling a regex in a loop
213+
--> tests/ui/regex.rs:127:21
214+
|
215+
LL | let regex = BRegex::new("a.b");
216+
| ^^^^^^^^^^^
217+
|
218+
help: move the regex construction outside this loop
219+
--> tests/ui/regex.rs:122:5
220+
|
221+
LL | loop {
222+
| ^^^^
223+
224+
error: compiling a regex in a loop
225+
--> tests/ui/regex.rs:133:25
226+
|
227+
LL | let regex = Regex::new("a.b");
228+
| ^^^^^^^^^^
229+
|
230+
help: move the regex construction outside this loop
231+
--> tests/ui/regex.rs:122:5
232+
|
233+
LL | loop {
234+
| ^^^^
235+
236+
error: compiling a regex in a loop
237+
--> tests/ui/regex.rs:138:32
238+
|
239+
LL | let nested_regex = Regex::new("a.b");
240+
| ^^^^^^^^^^
241+
|
242+
help: move the regex construction outside this loop
243+
--> tests/ui/regex.rs:137:9
244+
|
245+
LL | for _ in 0..10 {
246+
| ^^^^^^^^^^^^^^
247+
248+
error: aborting due to 28 previous errors
199249

0 commit comments

Comments
 (0)