Skip to content

Commit 05659de

Browse files
committed
regex: support perf-cache
This makes the thread_local (and by consequence, lazy_static) crates optional by providing a naive caching mechanism when perf-cache is disabled. This is achieved by defining a common API and implementing it via both approaches. The one tricky bit here is to ensure our naive version implements the same auto-traits as the fast version. Since we just use a plain mutex, it impls RefUnwindSafe, but thread_local does not. So we forcefully remove the RefUnwindSafe impl from our safe variant. We should be able to implement RefUnwindSafe in both cases, but this likely requires some mechanism for clearing the regex cache automatically if a panic occurs anywhere during search. But that's a more invasive change and is part of #576.
1 parent 365ef55 commit 05659de

File tree

3 files changed

+122
-21
lines changed

3 files changed

+122
-21
lines changed

src/cache.rs

+100
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
// This module defines a common API for caching internal runtime state.
2+
// The `thread_local` crate provides an extremely optimized version of this.
3+
// However, if the perf-cache feature is disabled, then we drop the
4+
// thread_local dependency and instead use a pretty naive caching mechanism
5+
// with a mutex.
6+
//
7+
// Strictly speaking, the CachedGuard isn't necessary for the much more
8+
// flexible thread_local API, but implementing thread_local's API doesn't
9+
// seem possible in purely safe code.
10+
11+
pub use self::imp::{Cached, CachedGuard};
12+
13+
#[cfg(feature = "perf-cache")]
14+
mod imp {
15+
use thread_local::CachedThreadLocal;
16+
17+
#[derive(Debug)]
18+
pub struct Cached<T: Send>(CachedThreadLocal<T>);
19+
20+
#[derive(Debug)]
21+
pub struct CachedGuard<'a, T: 'a>(&'a T);
22+
23+
impl<T: Send> Cached<T> {
24+
pub fn new() -> Cached<T> {
25+
Cached(CachedThreadLocal::new())
26+
}
27+
28+
pub fn get_or(&self, create: impl FnOnce() -> T) -> CachedGuard<T> {
29+
CachedGuard(self.0.get_or(|| Box::new(create())))
30+
}
31+
}
32+
33+
impl<'a, T: Send> CachedGuard<'a, T> {
34+
pub fn value(&self) -> &T {
35+
self.0
36+
}
37+
}
38+
}
39+
40+
#[cfg(not(feature = "perf-cache"))]
41+
mod imp {
42+
use std::marker::PhantomData;
43+
use std::panic::UnwindSafe;
44+
use std::sync::Mutex;
45+
46+
#[derive(Debug)]
47+
pub struct Cached<T: Send> {
48+
stack: Mutex<Vec<T>>,
49+
/// When perf-cache is enabled, the thread_local crate is used, and
50+
/// its CachedThreadLocal impls Send, Sync and UnwindSafe, but NOT
51+
/// RefUnwindSafe. However, a Mutex impls RefUnwindSafe. So in order
52+
/// to keep the APIs consistent regardless of whether perf-cache is
53+
/// enabled, we force this type to NOT impl RefUnwindSafe too.
54+
///
55+
/// Ideally, we should always impl RefUnwindSafe, but it seems a little
56+
/// tricky to do that right now.
57+
///
58+
/// See also: https://github.com/rust-lang/regex/issues/576
59+
_phantom: PhantomData<Box<Send + Sync + UnwindSafe>>,
60+
}
61+
62+
#[derive(Debug)]
63+
pub struct CachedGuard<'a, T: Send> {
64+
cache: &'a Cached<T>,
65+
value: Option<T>,
66+
}
67+
68+
impl<T: Send> Cached<T> {
69+
pub fn new() -> Cached<T> {
70+
Cached { stack: Mutex::new(vec![]), _phantom: PhantomData }
71+
}
72+
73+
pub fn get_or(&self, create: impl FnOnce() -> T) -> CachedGuard<T> {
74+
let mut stack = self.stack.lock().unwrap();
75+
match stack.pop() {
76+
None => CachedGuard { cache: self, value: Some(create()) },
77+
Some(value) => CachedGuard { cache: self, value: Some(value) },
78+
}
79+
}
80+
81+
fn put(&self, value: T) {
82+
let mut stack = self.stack.lock().unwrap();
83+
stack.push(value);
84+
}
85+
}
86+
87+
impl<'a, T: Send> CachedGuard<'a, T> {
88+
pub fn value(&self) -> &T {
89+
self.value.as_ref().unwrap()
90+
}
91+
}
92+
93+
impl<'a, T: Send> Drop for CachedGuard<'a, T> {
94+
fn drop(&mut self) {
95+
if let Some(value) = self.value.take() {
96+
self.cache.put(value);
97+
}
98+
}
99+
}
100+
}

src/exec.rs

+20-21
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@ use aho_corasick::{AhoCorasick, AhoCorasickBuilder, MatchKind};
66
use syntax::hir::literal::Literals;
77
use syntax::hir::Hir;
88
use syntax::ParserBuilder;
9-
use thread_local::CachedThreadLocal;
109

1110
use backtrack;
11+
use cache::{Cached, CachedGuard};
1212
use compile::Compiler;
1313
use dfa;
1414
use error::Error;
@@ -32,7 +32,7 @@ pub struct Exec {
3232
/// All read only state.
3333
ro: Arc<ExecReadOnly>,
3434
/// Caches for the various matching engines.
35-
cache: CachedThreadLocal<ProgramCache>,
35+
cache: Cached<ProgramCache>,
3636
}
3737

3838
/// `ExecNoSync` is like `Exec`, except it embeds a reference to a cache. This
@@ -43,7 +43,7 @@ pub struct ExecNoSync<'c> {
4343
/// All read only state.
4444
ro: &'c Arc<ExecReadOnly>,
4545
/// Caches for the various matching engines.
46-
cache: &'c ProgramCache,
46+
cache: CachedGuard<'c, ProgramCache>,
4747
}
4848

4949
/// `ExecNoSyncStr` is like `ExecNoSync`, but matches on &str instead of &[u8].
@@ -291,7 +291,7 @@ impl ExecBuilder {
291291
ac: None,
292292
match_type: MatchType::Nothing,
293293
});
294-
return Ok(Exec { ro: ro, cache: CachedThreadLocal::new() });
294+
return Ok(Exec { ro: ro, cache: Cached::new() });
295295
}
296296
let parsed = self.parse()?;
297297
let mut nfa = Compiler::new()
@@ -347,7 +347,7 @@ impl ExecBuilder {
347347
ro.match_type = ro.choose_match_type(self.match_type);
348348

349349
let ro = Arc::new(ro);
350-
Ok(Exec { ro: ro, cache: CachedThreadLocal::new() })
350+
Ok(Exec { ro: ro, cache: Cached::new() })
351351
}
352352
}
353353

@@ -423,7 +423,7 @@ impl<'c> RegularExpression for ExecNoSync<'c> {
423423
MatchType::DfaAnchoredReverse => {
424424
match dfa::Fsm::reverse(
425425
&self.ro.dfa_reverse,
426-
self.cache,
426+
self.cache.value(),
427427
true,
428428
&text[start..],
429429
text.len(),
@@ -471,7 +471,7 @@ impl<'c> RegularExpression for ExecNoSync<'c> {
471471
MatchType::DfaAnchoredReverse => {
472472
match dfa::Fsm::reverse(
473473
&self.ro.dfa_reverse,
474-
self.cache,
474+
self.cache.value(),
475475
true,
476476
&text[start..],
477477
text.len(),
@@ -691,7 +691,7 @@ impl<'c> ExecNoSync<'c> {
691691
use dfa::Result::*;
692692
let end = match dfa::Fsm::forward(
693693
&self.ro.dfa,
694-
self.cache,
694+
self.cache.value(),
695695
false,
696696
text,
697697
start,
@@ -704,7 +704,7 @@ impl<'c> ExecNoSync<'c> {
704704
// Now run the DFA in reverse to find the start of the match.
705705
match dfa::Fsm::reverse(
706706
&self.ro.dfa_reverse,
707-
self.cache,
707+
self.cache.value(),
708708
false,
709709
&text[start..],
710710
end - start,
@@ -730,7 +730,7 @@ impl<'c> ExecNoSync<'c> {
730730
use dfa::Result::*;
731731
match dfa::Fsm::reverse(
732732
&self.ro.dfa_reverse,
733-
self.cache,
733+
self.cache.value(),
734734
false,
735735
&text[start..],
736736
text.len() - start,
@@ -744,7 +744,7 @@ impl<'c> ExecNoSync<'c> {
744744
/// Finds the end of the shortest match using only the DFA.
745745
#[cfg_attr(feature = "perf-inline", inline(always))]
746746
fn shortest_dfa(&self, text: &[u8], start: usize) -> dfa::Result<usize> {
747-
dfa::Fsm::forward(&self.ro.dfa, self.cache, true, text, start)
747+
dfa::Fsm::forward(&self.ro.dfa, self.cache.value(), true, text, start)
748748
}
749749

750750
/// Finds the end of the shortest match using only the DFA by scanning for
@@ -796,7 +796,7 @@ impl<'c> ExecNoSync<'c> {
796796
end = last_literal + lcs.len();
797797
match dfa::Fsm::reverse(
798798
&self.ro.dfa_reverse,
799-
self.cache,
799+
self.cache.value(),
800800
false,
801801
&text[start..end],
802802
end - start,
@@ -841,7 +841,7 @@ impl<'c> ExecNoSync<'c> {
841841
// leftmost-first match.)
842842
match dfa::Fsm::forward(
843843
&self.ro.dfa,
844-
self.cache,
844+
self.cache.value(),
845845
false,
846846
text,
847847
match_start,
@@ -1007,7 +1007,7 @@ impl<'c> ExecNoSync<'c> {
10071007
if self.ro.nfa.uses_bytes() {
10081008
pikevm::Fsm::exec(
10091009
&self.ro.nfa,
1010-
self.cache,
1010+
self.cache.value(),
10111011
matches,
10121012
slots,
10131013
quit_after_match,
@@ -1018,7 +1018,7 @@ impl<'c> ExecNoSync<'c> {
10181018
} else {
10191019
pikevm::Fsm::exec(
10201020
&self.ro.nfa,
1021-
self.cache,
1021+
self.cache.value(),
10221022
matches,
10231023
slots,
10241024
quit_after_match,
@@ -1041,7 +1041,7 @@ impl<'c> ExecNoSync<'c> {
10411041
if self.ro.nfa.uses_bytes() {
10421042
backtrack::Bounded::exec(
10431043
&self.ro.nfa,
1044-
self.cache,
1044+
self.cache.value(),
10451045
matches,
10461046
slots,
10471047
ByteInput::new(text, self.ro.nfa.only_utf8),
@@ -1051,7 +1051,7 @@ impl<'c> ExecNoSync<'c> {
10511051
} else {
10521052
backtrack::Bounded::exec(
10531053
&self.ro.nfa,
1054-
self.cache,
1054+
self.cache.value(),
10551055
matches,
10561056
slots,
10571057
CharInput::new(text),
@@ -1087,7 +1087,7 @@ impl<'c> ExecNoSync<'c> {
10871087
Dfa | DfaAnchoredReverse | DfaSuffix | DfaMany => {
10881088
match dfa::Fsm::forward_many(
10891089
&self.ro.dfa,
1090-
self.cache,
1090+
self.cache.value(),
10911091
matches,
10921092
text,
10931093
start,
@@ -1145,8 +1145,7 @@ impl Exec {
11451145
/// Get a searcher that isn't Sync.
11461146
#[cfg_attr(feature = "perf-inline", inline(always))]
11471147
pub fn searcher(&self) -> ExecNoSync {
1148-
let create =
1149-
|| Box::new(RefCell::new(ProgramCacheInner::new(&self.ro)));
1148+
let create = || RefCell::new(ProgramCacheInner::new(&self.ro));
11501149
ExecNoSync {
11511150
ro: &self.ro, // a clone is too expensive here! (and not needed)
11521151
cache: self.cache.get_or(create),
@@ -1201,7 +1200,7 @@ impl Exec {
12011200

12021201
impl Clone for Exec {
12031202
fn clone(&self) -> Exec {
1204-
Exec { ro: self.ro.clone(), cache: CachedThreadLocal::new() }
1203+
Exec { ro: self.ro.clone(), cache: Cached::new() }
12051204
}
12061205
}
12071206

src/lib.rs

+2
Original file line numberDiff line numberDiff line change
@@ -621,6 +621,7 @@ compile_error!("`std` feature is currently required to build this crate");
621621

622622
extern crate aho_corasick;
623623
extern crate memchr;
624+
#[cfg(feature = "perf-cache")]
624625
extern crate thread_local;
625626
#[cfg(test)]
626627
#[macro_use]
@@ -744,6 +745,7 @@ pub mod bytes {
744745
}
745746

746747
mod backtrack;
748+
mod cache;
747749
mod compile;
748750
mod dfa;
749751
mod error;

0 commit comments

Comments
 (0)