diff --git a/CHANGELOG.md b/CHANGELOG.md index 5d253d52531e..cb00807d71a6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5874,6 +5874,7 @@ Released 2018-09-13 [`ref_option`]: https://rust-lang.github.io/rust-clippy/master/index.html#ref_option [`ref_option_ref`]: https://rust-lang.github.io/rust-clippy/master/index.html#ref_option_ref [`ref_patterns`]: https://rust-lang.github.io/rust-clippy/master/index.html#ref_patterns +[`regex_creation_in_loops`]: https://rust-lang.github.io/rust-clippy/master/index.html#regex_creation_in_loops [`regex_macro`]: https://rust-lang.github.io/rust-clippy/master/index.html#regex_macro [`renamed_function_params`]: https://rust-lang.github.io/rust-clippy/master/index.html#renamed_function_params [`repeat_once`]: https://rust-lang.github.io/rust-clippy/master/index.html#repeat_once diff --git a/clippy_lints/src/declared_lints.rs b/clippy_lints/src/declared_lints.rs index 9cec672beb00..44402578f3f5 100644 --- a/clippy_lints/src/declared_lints.rs +++ b/clippy_lints/src/declared_lints.rs @@ -639,6 +639,7 @@ pub static LINTS: &[&crate::LintInfo] = &[ crate::ref_patterns::REF_PATTERNS_INFO, crate::reference::DEREF_ADDROF_INFO, crate::regex::INVALID_REGEX_INFO, + crate::regex::REGEX_CREATION_IN_LOOPS_INFO, crate::regex::TRIVIAL_REGEX_INFO, crate::repeat_vec_with_capacity::REPEAT_VEC_WITH_CAPACITY_INFO, crate::reserve_after_initialization::RESERVE_AFTER_INITIALIZATION_INFO, diff --git a/clippy_lints/src/regex.rs b/clippy_lints/src/regex.rs index 12cbdb854eff..6a5bf1b8045d 100644 --- a/clippy_lints/src/regex.rs +++ b/clippy_lints/src/regex.rs @@ -6,7 +6,7 @@ use clippy_utils::source::SpanRangeExt; use clippy_utils::{def_path_res_with_base, find_crates, path_def_id, paths}; use rustc_ast::ast::{LitKind, StrStyle}; use rustc_hir::def_id::DefIdMap; -use rustc_hir::{BorrowKind, Expr, ExprKind}; +use rustc_hir::{BorrowKind, Expr, ExprKind, OwnerId}; use rustc_lint::{LateContext, LateLintPass}; use rustc_session::impl_lint_pass; use rustc_span::{BytePos, Span}; @@ -55,6 +55,44 @@ declare_clippy_lint! { "trivial regular expressions" } +declare_clippy_lint! { + /// ### What it does + /// + /// Checks for [regex](https://crates.io/crates/regex) compilation inside a loop with a literal. + /// + /// ### Why is this bad? + /// + /// Compiling a regex is a much more expensive operation than using one, and a compiled regex can be used multiple times. + /// This is documented as an antipattern [on the regex documentation](https://docs.rs/regex/latest/regex/#avoid-re-compiling-regexes-especially-in-a-loop) + /// + /// ### Example + /// ```no_run + /// # let haystacks = [""]; + /// # const MY_REGEX: &str = "a.b"; + /// for haystack in haystacks { + /// let regex = regex::Regex::new(MY_REGEX).unwrap(); + /// if regex.is_match(haystack) { + /// // Perform operation + /// } + /// } + /// ``` + /// can be replaced with + /// ```no_run + /// # let haystacks = [""]; + /// # const MY_REGEX: &str = "a.b"; + /// let regex = regex::Regex::new(MY_REGEX).unwrap(); + /// for haystack in haystacks { + /// if regex.is_match(haystack) { + /// // Perform operation + /// } + /// } + /// ``` + #[clippy::version = "1.83.0"] + pub REGEX_CREATION_IN_LOOPS, + perf, + "regular expression compilation performed in a loop" +} + #[derive(Copy, Clone)] enum RegexKind { Unicode, @@ -66,9 +104,10 @@ enum RegexKind { #[derive(Default)] pub struct Regex { definitions: DefIdMap, + loop_stack: Vec<(OwnerId, Span)>, } -impl_lint_pass!(Regex => [INVALID_REGEX, TRIVIAL_REGEX]); +impl_lint_pass!(Regex => [INVALID_REGEX, TRIVIAL_REGEX, REGEX_CREATION_IN_LOOPS]); impl<'tcx> LateLintPass<'tcx> for Regex { fn check_crate(&mut self, cx: &LateContext<'tcx>) { @@ -99,12 +138,34 @@ impl<'tcx> LateLintPass<'tcx> for Regex { && let Some(def_id) = path_def_id(cx, fun) && let Some(regex_kind) = self.definitions.get(&def_id) { + if let Some(&(loop_item_id, loop_span)) = self.loop_stack.last() + && loop_item_id == fun.hir_id.owner + && (matches!(arg.kind, ExprKind::Lit(_)) || const_str(cx, arg).is_some()) + { + span_lint_and_help( + cx, + REGEX_CREATION_IN_LOOPS, + fun.span, + "compiling a regex in a loop", + Some(loop_span), + "move the regex construction outside this loop", + ); + } + match regex_kind { RegexKind::Unicode => check_regex(cx, arg, true), RegexKind::UnicodeSet => check_set(cx, arg, true), RegexKind::Bytes => check_regex(cx, arg, false), RegexKind::BytesSet => check_set(cx, arg, false), } + } else if let ExprKind::Loop(block, _, _, span) = expr.kind { + self.loop_stack.push((block.hir_id.owner, span)); + } + } + + fn check_expr_post(&mut self, _: &LateContext<'tcx>, expr: &'tcx Expr<'_>) { + if matches!(expr.kind, ExprKind::Loop(..)) { + self.loop_stack.pop(); } } } diff --git a/tests/ui/regex.rs b/tests/ui/regex.rs index 4fb6c08bb449..f607a2d50c6d 100644 --- a/tests/ui/regex.rs +++ b/tests/ui/regex.rs @@ -5,7 +5,7 @@ clippy::needless_borrow, clippy::needless_borrows_for_generic_args )] -#![warn(clippy::invalid_regex, clippy::trivial_regex)] +#![warn(clippy::invalid_regex, clippy::trivial_regex, clippy::regex_creation_in_loops)] extern crate regex; @@ -118,7 +118,35 @@ fn trivial_regex() { let _ = BRegex::new(r"\b{start}word\b{end}"); } +fn regex_creation_in_loops() { + loop { + static STATIC_REGEX: std::sync::LazyLock = std::sync::LazyLock::new(|| Regex::new("a.b").unwrap()); + + let regex = Regex::new("a.b"); + //~^ ERROR: compiling a regex in a loop + let regex = BRegex::new("a.b"); + //~^ ERROR: compiling a regex in a loop + #[allow(clippy::regex_creation_in_loops)] + let allowed_regex = Regex::new("a.b"); + + if true { + let regex = Regex::new("a.b"); + //~^ ERROR: compiling a regex in a loop + } + + for _ in 0..10 { + let nested_regex = Regex::new("a.b"); + //~^ ERROR: compiling a regex in a loop + } + } + + for i in 0..10 { + let dependant_regex = Regex::new(&format!("{i}")); + } +} + fn main() { syntax_error(); trivial_regex(); + regex_creation_in_loops(); } diff --git a/tests/ui/regex.stderr b/tests/ui/regex.stderr index e936208d8d7b..18dd538c68b4 100644 --- a/tests/ui/regex.stderr +++ b/tests/ui/regex.stderr @@ -195,5 +195,55 @@ LL | let binary_trivial_empty = BRegex::new("^$"); | = help: consider using `str::is_empty` -error: aborting due to 24 previous errors +error: compiling a regex in a loop + --> tests/ui/regex.rs:125:21 + | +LL | let regex = Regex::new("a.b"); + | ^^^^^^^^^^ + | +help: move the regex construction outside this loop + --> tests/ui/regex.rs:122:5 + | +LL | loop { + | ^^^^ + = note: `-D clippy::regex-creation-in-loops` implied by `-D warnings` + = help: to override `-D warnings` add `#[allow(clippy::regex_creation_in_loops)]` + +error: compiling a regex in a loop + --> tests/ui/regex.rs:127:21 + | +LL | let regex = BRegex::new("a.b"); + | ^^^^^^^^^^^ + | +help: move the regex construction outside this loop + --> tests/ui/regex.rs:122:5 + | +LL | loop { + | ^^^^ + +error: compiling a regex in a loop + --> tests/ui/regex.rs:133:25 + | +LL | let regex = Regex::new("a.b"); + | ^^^^^^^^^^ + | +help: move the regex construction outside this loop + --> tests/ui/regex.rs:122:5 + | +LL | loop { + | ^^^^ + +error: compiling a regex in a loop + --> tests/ui/regex.rs:138:32 + | +LL | let nested_regex = Regex::new("a.b"); + | ^^^^^^^^^^ + | +help: move the regex construction outside this loop + --> tests/ui/regex.rs:137:9 + | +LL | for _ in 0..10 { + | ^^^^^^^^^^^^^^ + +error: aborting due to 28 previous errors