Skip to content

Commit 1440041

Browse files
committed
impl: drop thread_local dependency
This commit removes the thread_local dependency (even as an optional dependency) and replaces it with a more purpose driven memory pool. The comments in src/pool.rs explain this in more detail, but the short story is that thread_local seems to be at the root of some memory leaks happening in certain usage scenarios. The great thing about thread_local though is how fast it is. Using a simple Mutex<Vec<T>> is easily at least twice as slow. We work around that a bit by coding a simplistic fast path for the "owner" of a pool. This does require one new use of `unsafe`, of which we extensively document. This now makes the 'perf-cache' feature a no-op. We of course retain it for compatibility purposes (and perhaps it will be used again in the future), but for now, we always use the same pool. As for benchmarks, it is likely that *some* cases will get a hair slower. But there shouldn't be any dramatic difference. A careful review of micro-benchmarks in addition to more holistic (albeit ad hoc) benchmarks via ripgrep seems to confirm this. Now that we have more explicit control over the memory pool, we also clean stuff up with repsect to RefUnwindSafe. Fixes #362, Fixes #576 Ref BurntSushi/rure-go#3
1 parent 83d2452 commit 1440041

File tree

6 files changed

+392
-128
lines changed

6 files changed

+392
-128
lines changed

Diff for: Cargo.toml

+4-6
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,10 @@ use_std = ["std"]
5151
# Enables all performance features.
5252
perf = ["perf-cache", "perf-dfa", "perf-inline", "perf-literal"]
5353
# Enables fast caching. (If disabled, caching is still used, but is slower.)
54-
perf-cache = ["thread_local"]
54+
# Currently, this feature has no effect. It used to remove the thread_local
55+
# dependency and use a slower internal cache, but now the default cache has
56+
# been improved and thread_local is no longer a dependency at all.
57+
perf-cache = []
5558
# Enables use of a lazy DFA when possible.
5659
perf-dfa = []
5760
# Enables aggressive use of inlining.
@@ -110,11 +113,6 @@ optional = true
110113
version = "2.2.1"
111114
optional = true
112115

113-
# For managing regex caches quickly across multiple threads.
114-
[dependencies.thread_local]
115-
version = "1"
116-
optional = true
117-
118116
# For parsing regular expressions.
119117
[dependencies.regex-syntax]
120118
path = "regex-syntax"

Diff for: src/cache.rs

-102
This file was deleted.

Diff for: src/exec.rs

+24-10
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
use std::cell::RefCell;
22
use std::collections::HashMap;
3+
use std::panic::AssertUnwindSafe;
34
use std::sync::Arc;
45

56
#[cfg(feature = "perf-literal")]
@@ -9,14 +10,14 @@ use syntax::hir::Hir;
910
use syntax::ParserBuilder;
1011

1112
use backtrack;
12-
use cache::{Cached, CachedGuard};
1313
use compile::Compiler;
1414
#[cfg(feature = "perf-dfa")]
1515
use dfa;
1616
use error::Error;
1717
use input::{ByteInput, CharInput};
1818
use literal::LiteralSearcher;
1919
use pikevm;
20+
use pool::{Pool, PoolGuard};
2021
use prog::Program;
2122
use re_builder::RegexOptions;
2223
use re_bytes;
@@ -34,8 +35,8 @@ use utf8::next_utf8;
3435
pub struct Exec {
3536
/// All read only state.
3637
ro: Arc<ExecReadOnly>,
37-
/// Caches for the various matching engines.
38-
cache: Cached<ProgramCache>,
38+
/// A pool of reusable values for the various matching engines.
39+
pool: Pool<ProgramCache>,
3940
}
4041

4142
/// `ExecNoSync` is like `Exec`, except it embeds a reference to a cache. This
@@ -46,7 +47,7 @@ pub struct ExecNoSync<'c> {
4647
/// All read only state.
4748
ro: &'c Arc<ExecReadOnly>,
4849
/// Caches for the various matching engines.
49-
cache: CachedGuard<'c, ProgramCache>,
50+
cache: PoolGuard<'c, ProgramCache>,
5051
}
5152

5253
/// `ExecNoSyncStr` is like `ExecNoSync`, but matches on &str instead of &[u8].
@@ -302,7 +303,8 @@ impl ExecBuilder {
302303
ac: None,
303304
match_type: MatchType::Nothing,
304305
});
305-
return Ok(Exec { ro: ro, cache: Cached::new() });
306+
let pool = ExecReadOnly::new_pool(&ro);
307+
return Ok(Exec { ro: ro, pool });
306308
}
307309
let parsed = self.parse()?;
308310
let mut nfa = Compiler::new()
@@ -342,7 +344,8 @@ impl ExecBuilder {
342344
ro.match_type = ro.choose_match_type(self.match_type);
343345

344346
let ro = Arc::new(ro);
345-
Ok(Exec { ro: ro, cache: Cached::new() })
347+
let pool = ExecReadOnly::new_pool(&ro);
348+
Ok(Exec { ro, pool })
346349
}
347350

348351
#[cfg(feature = "perf-literal")]
@@ -1254,10 +1257,9 @@ impl Exec {
12541257
/// Get a searcher that isn't Sync.
12551258
#[cfg_attr(feature = "perf-inline", inline(always))]
12561259
pub fn searcher(&self) -> ExecNoSync {
1257-
let create = || RefCell::new(ProgramCacheInner::new(&self.ro));
12581260
ExecNoSync {
12591261
ro: &self.ro, // a clone is too expensive here! (and not needed)
1260-
cache: self.cache.get_or(create),
1262+
cache: self.pool.get(),
12611263
}
12621264
}
12631265

@@ -1309,7 +1311,8 @@ impl Exec {
13091311

13101312
impl Clone for Exec {
13111313
fn clone(&self) -> Exec {
1312-
Exec { ro: self.ro.clone(), cache: Cached::new() }
1314+
let pool = ExecReadOnly::new_pool(&self.ro);
1315+
Exec { ro: self.ro.clone(), pool }
13131316
}
13141317
}
13151318

@@ -1442,6 +1445,13 @@ impl ExecReadOnly {
14421445
let lcs_len = self.suffixes.lcs().char_len();
14431446
lcs_len >= 3 && lcs_len > self.dfa.prefixes.lcp().char_len()
14441447
}
1448+
1449+
fn new_pool(ro: &Arc<ExecReadOnly>) -> Pool<ProgramCache> {
1450+
let ro = ro.clone();
1451+
Pool::new(Box::new(move || {
1452+
AssertUnwindSafe(RefCell::new(ProgramCacheInner::new(&ro)))
1453+
}))
1454+
}
14451455
}
14461456

14471457
#[derive(Clone, Copy, Debug)]
@@ -1500,7 +1510,11 @@ enum MatchNfaType {
15001510

15011511
/// `ProgramCache` maintains reusable allocations for each matching engine
15021512
/// available to a particular program.
1503-
pub type ProgramCache = RefCell<ProgramCacheInner>;
1513+
///
1514+
/// We declare this as unwind safe since it's a cache that's only used for
1515+
/// performance purposes. If a panic occurs, it is (or should be) always safe
1516+
/// to continue using the same regex object.
1517+
pub type ProgramCache = AssertUnwindSafe<RefCell<ProgramCacheInner>>;
15041518

15051519
#[derive(Debug)]
15061520
pub struct ProgramCacheInner {

Diff for: src/lib.rs

+6-8
Original file line numberDiff line numberDiff line change
@@ -523,11 +523,6 @@ All features below are enabled by default.
523523
Enables all performance related features. This feature is enabled by default
524524
and will always cover all features that improve performance, even if more
525525
are added in the future.
526-
* **perf-cache** -
527-
Enables the use of very fast thread safe caching for internal match state.
528-
When this is disabled, caching is still used, but with a slower and simpler
529-
implementation. Disabling this drops the `thread_local` and `lazy_static`
530-
dependencies.
531526
* **perf-dfa** -
532527
Enables the use of a lazy DFA for matching. The lazy DFA is used to compile
533528
portions of a regex to a very fast DFA on an as-needed basis. This can
@@ -542,6 +537,11 @@ All features below are enabled by default.
542537
Enables the use of literal optimizations for speeding up matches. In some
543538
cases, literal optimizations can result in speedups of _several_ orders of
544539
magnitude. Disabling this drops the `aho-corasick` and `memchr` dependencies.
540+
* **perf-cache** -
541+
This feature used to enable a faster internal cache at the cost of using
542+
additional dependencies, but this is no longer an option. A fast internal
543+
cache is now used unconditionally with no additional dependencies. This may
544+
change in the future.
545545
546546
### Unicode features
547547
@@ -631,8 +631,6 @@ extern crate memchr;
631631
#[cfg_attr(feature = "perf-literal", macro_use)]
632632
extern crate quickcheck;
633633
extern crate regex_syntax as syntax;
634-
#[cfg(feature = "perf-cache")]
635-
extern crate thread_local;
636634

637635
// #[cfg(doctest)]
638636
// doc_comment::doctest!("../README.md");
@@ -749,7 +747,6 @@ pub mod bytes {
749747
}
750748

751749
mod backtrack;
752-
mod cache;
753750
mod compile;
754751
#[cfg(feature = "perf-dfa")]
755752
mod dfa;
@@ -764,6 +761,7 @@ mod literal;
764761
#[cfg(feature = "pattern")]
765762
mod pattern;
766763
mod pikevm;
764+
mod pool;
767765
mod prog;
768766
mod re_builder;
769767
mod re_bytes;

0 commit comments

Comments
 (0)