Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(perf): Improve performance of event processing by avoiding regex clone #767

Merged
merged 3 commits into from
Sep 11, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
- Add the client SDK to session kafka payloads. ([#751](https://github.com/getsentry/relay/pull/751))
- Add a standalone tool to document metrics in JSON or YAML. ([#752](https://github.com/getsentry/relay/pull/752))
- Emit `processing.event.produced` for user report and session Kafka messages. ([#757](https://github.com/getsentry/relay/pull/757))
- Improve performance of event processing by avoiding regex clone. ([#767](https://github.com/getsentry/relay/pull/767))

## 20.8.0

Expand Down
7 changes: 2 additions & 5 deletions relay-general/derive/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -728,12 +728,9 @@ impl FieldAttrs {
};

let match_regex = if let Some(ref match_regex) = self.match_regex {
quote!(Some(
#[allow(clippy::trivial_regex)]
::regex::Regex::new(#match_regex).unwrap()
))
quote!(Some(#match_regex))
} else if let Some(ref parent_attrs) = inherit_from_field_attrs {
quote!(#parent_attrs.match_regex.clone())
quote!(#parent_attrs.match_regex)
} else {
quote!(None)
};
Expand Down
11 changes: 4 additions & 7 deletions relay-general/derive/src/process.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,10 +81,7 @@ pub fn derive_process_value(mut s: synstructure::Structure<'_>) -> TokenStream {
let field_attrs_tokens = field_attrs.as_tokens(None);

(quote! {
::lazy_static::lazy_static! {
static ref #field_attrs_name: crate::processor::FieldAttrs =
#field_attrs_tokens;
}
static #field_attrs_name: crate::processor::FieldAttrs = #field_attrs_tokens;
})
.to_tokens(&mut body);

Expand All @@ -94,21 +91,21 @@ pub fn derive_process_value(mut s: synstructure::Structure<'_>) -> TokenStream {
}

quote! {
__state.enter_nothing(Some(::std::borrow::Cow::Borrowed(&*#field_attrs_name)))
__state.enter_nothing(Some(::std::borrow::Cow::Borrowed(&#field_attrs_name)))
}
} else if is_tuple_struct {
quote! {
__state.enter_index(
#index,
Some(::std::borrow::Cow::Borrowed(&*#field_attrs_name)),
Some(::std::borrow::Cow::Borrowed(&#field_attrs_name)),
crate::processor::ValueType::for_field(#ident),
)
}
} else {
quote! {
__state.enter_static(
#field_name,
Some(::std::borrow::Cow::Borrowed(&*#field_attrs_name)),
Some(::std::borrow::Cow::Borrowed(&#field_attrs_name)),
crate::processor::ValueType::for_field(#ident),
)
}
Expand Down
5 changes: 2 additions & 3 deletions relay-general/src/processor/attrs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ use std::fmt;
use std::str::FromStr;

use failure::Fail;
use regex::Regex;
use smallvec::SmallVec;

use crate::processor::{ProcessValue, SelectorPathItem, SelectorSpec};
Expand Down Expand Up @@ -232,7 +231,7 @@ pub enum Pii {
}

/// Meta information about a field.
#[derive(Debug, Clone)]
#[derive(Debug, Clone, Copy)]
pub struct FieldAttrs {
/// Optionally the name of the field.
pub name: Option<&'static str>,
Expand All @@ -243,7 +242,7 @@ pub struct FieldAttrs {
/// Whether to trim whitespace from this string.
pub trim_whitespace: bool,
/// A regex to validate the (string) value against.
pub match_regex: Option<Regex>,
pub match_regex: Option<&'static str>,
/// The maximum char length of this field.
pub max_chars: Option<MaxChars>,
/// The maximum bag size of this field.
Expand Down
34 changes: 32 additions & 2 deletions relay-general/src/store/schema.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,40 @@
use regex::Regex;

use crate::processor::{ProcessValue, ProcessingState, Processor};
use crate::types::{
Array, Empty, Error, ErrorKind, Meta, Object, ProcessingAction, ProcessingResult,
};

pub struct SchemaProcessor;

macro_rules! declare_used_field_regexes {
($($ident:ident: $regex:expr),* $(,)*) => {
fn get_regex(name: &'static str) -> &'static Regex {
lazy_static::lazy_static! {
$(
static ref $ident: Regex = Regex::new($regex).unwrap();
)*
};

match name {
$($regex => &$ident, )*
_ => panic!("Please declare Regex {} using declare_used_field_regexes.", name),
}
}
}
}

// Pre-built list of regexes for max performance. The identifier in front is arbitrary, but needs
// to be unique.
declare_used_field_regexes![
A: r"^[^\r\n\f\t/]*\z",
B: r"^[^\r\n\x0C/]+$",
C: r"^[^\r\n]*\z",
D: r"^[a-zA-Z0-9_\.:-]+\z",
E: r"^\s*[a-zA-Z0-9_.-]*\s*$",
F: r"^[^\n]+\z",
];

impl Processor for SchemaProcessor {
fn process_string(
&mut self,
Expand Down Expand Up @@ -95,8 +125,8 @@ fn verify_value_pattern(
meta: &mut Meta,
state: &ProcessingState<'_>,
) -> ProcessingResult {
if let Some(ref regex) = state.attrs().match_regex {
if !regex.is_match(value) {
if let Some(ref regex_string) = state.attrs().match_regex {
if !get_regex(regex_string).is_match(value) {
meta.add_error(Error::invalid("invalid characters in string"));
return Err(ProcessingAction::DeleteValueSoft);
}
Expand Down