-
Notifications
You must be signed in to change notification settings - Fork 1.6k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add microbenchmarks to lib/datadog/grok
With reference to #10144 and in light of #11849 we now have an understanding that http -> pipelines -> blackhole is significantly bottlenecked in datadog-grok. Unfortunately most of our data indicates we're looking at regex being the prime pain point. This commit does two things: introduces micro-benchmarks for `datadog_grok::filters::keyvalue::apply_filter` -- unfortunately exposing `datadog_grok::filters` from the crate so we can benchmark it -- and improves the performance of said function by +40% in the micro when there is a field delimiter in place. Specifically, we remove the need for nom-regex and avoid cloning a `regex::Regex` instance for each key and each value in a field. Signed-off-by: Brian L. Troutwine <brian@troutwine.us>
- Loading branch information
Showing
7 changed files
with
127 additions
and
24 deletions.
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
use std::time::Duration; | ||
|
||
use bytes::Bytes; | ||
use criterion::{ | ||
criterion_group, measurement::WallTime, BatchSize, BenchmarkGroup, Criterion, SamplingMode, | ||
}; | ||
use datadog_grok::filters::keyvalue::{apply_filter, KeyValueFilter}; | ||
use regex::Regex; | ||
use value::Value; | ||
|
||
fn apply_filter_bench(c: &mut Criterion) { | ||
let mut group: BenchmarkGroup<WallTime> = | ||
c.benchmark_group("datadog_grok::filters::keyvalue::apply_filter"); | ||
group.sampling_mode(SamplingMode::Auto); | ||
|
||
group.bench_function("apply_filter key=valueStr", move |b| { | ||
b.iter_batched( | ||
|| { | ||
let value = Value::Bytes(Bytes::from("key=valueStr")); | ||
let filter = KeyValueFilter { | ||
key_value_delimiter: "=".into(), | ||
value_re: Regex::new(r"^[\w.\-_@]+").unwrap(), | ||
quotes: vec![('"', '"'), ('\'', '\''), ('<', '>')], | ||
field_delimiters: [" ", ",", ";"] | ||
.iter() | ||
.map(|x| String::from(*x)) | ||
.collect::<Vec<String>>(), | ||
}; | ||
(value, filter) | ||
}, | ||
|(value, filter): (Value, KeyValueFilter)| { | ||
let _ = apply_filter(&value, &filter); | ||
}, | ||
BatchSize::SmallInput, | ||
) | ||
}); | ||
|
||
group.bench_function("apply_filter key1=value1|key2=value2", move |b| { | ||
b.iter_batched( | ||
|| { | ||
let value = Value::Bytes(Bytes::from("key1=value1|key2=value2")); | ||
let filter = KeyValueFilter { | ||
key_value_delimiter: "=".into(), | ||
value_re: Regex::new(r"^[\w.\-_@]+").unwrap(), | ||
quotes: vec![('"', '"'), ('\'', '\''), ('<', '>')], | ||
field_delimiters: ["|"] | ||
.iter() | ||
.map(|x| String::from(*x)) | ||
.collect::<Vec<String>>(), | ||
}; | ||
(value, filter) | ||
}, | ||
|(value, filter): (Value, KeyValueFilter)| { | ||
let _ = apply_filter(&value, &filter); | ||
}, | ||
BatchSize::SmallInput, | ||
) | ||
}); | ||
} | ||
|
||
criterion_group!( | ||
name = benches; | ||
config = Criterion::default() | ||
.warm_up_time(Duration::from_secs(5)) | ||
.measurement_time(Duration::from_secs(120)) | ||
// degree of noise to ignore in measurements, here 1% | ||
.noise_threshold(0.01) | ||
// likelihood of noise registering as difference, here 5% | ||
.significance_level(0.05) | ||
// likelihood of capturing the true runtime, here 95% | ||
.confidence_level(0.95) | ||
// total number of bootstrap resamples, higher is less noisy but slower | ||
.nresamples(100_000) | ||
// total samples to collect within the set measurement time | ||
.sample_size(150); | ||
targets = apply_filter_bench | ||
); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
use criterion::criterion_main; | ||
|
||
mod keyvalue; | ||
|
||
criterion_main!(keyvalue::benches); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters