diff --git a/relay-cabi/src/processing.rs b/relay-cabi/src/processing.rs index a73264ac27..ccbb7041e7 100644 --- a/relay-cabi/src/processing.rs +++ b/relay-cabi/src/processing.rs @@ -12,7 +12,7 @@ use once_cell::sync::OnceCell; use relay_common::glob::{glob_match_bytes, GlobOptions}; use relay_dynamic_config::{normalize_json, validate_json, GlobalConfig, ProjectConfig}; use relay_event_normalization::{ - light_normalize_event, GeoIpLookup, LightNormalizationConfig, RawUserAgentInfo, StoreConfig, + GeoIpLookup, NormalizeProcessor, NormalizeProcessorConfig, RawUserAgentInfo, StoreConfig, StoreProcessor, }; use relay_event_schema::processor::{process_value, split_chunks, ProcessingState}; @@ -112,7 +112,7 @@ pub unsafe extern "C" fn relay_store_normalizer_normalize_event( let processor = normalizer as *mut StoreProcessor; let mut event = Annotated::::from_json((*event).as_str())?; let config = (*processor).config(); - let light_normalization_config = LightNormalizationConfig { + let normalization_config = NormalizeProcessorConfig { client_ip: config.client_ip.as_ref(), user_agent: RawUserAgentInfo { user_agent: config.user_agent.as_deref(), @@ -137,7 +137,11 @@ pub unsafe extern "C" fn relay_store_normalizer_normalize_event( enable_trimming: config.enable_trimming.unwrap_or_default(), measurements: None, }; - light_normalize_event(&mut event, light_normalization_config)?; + process_value( + &mut event, + &mut NormalizeProcessor::new(normalization_config), + ProcessingState::root(), + )?; process_value(&mut event, &mut *processor, ProcessingState::root())?; RelayStr::from_string(event.to_json()?) } diff --git a/relay-event-normalization/src/lib.rs b/relay-event-normalization/src/lib.rs index 8c71da0ddd..fccef85af8 100644 --- a/relay-event-normalization/src/lib.rs +++ b/relay-event-normalization/src/lib.rs @@ -150,7 +150,7 @@ pub struct StoreConfig { /// The processor that normalizes events for processing and storage. /// -/// This processor is a superset of [`light_normalize_event`], that runs additional and heavier +/// This processor is a superset of [`NormalizeProcessor`], that runs additional and heavier /// normalization steps. These normalizations should ideally be performed on events that are likely /// to be ingested, after other functionality such as inbound filters have run. /// diff --git a/relay-event-normalization/src/normalize/mod.rs b/relay-event-normalization/src/normalize/mod.rs index c4801362d2..4db6d1d884 100644 --- a/relay-event-normalization/src/normalize/mod.rs +++ b/relay-event-normalization/src/normalize/mod.rs @@ -1,16 +1,12 @@ use std::hash::Hash; -use std::ops::Range; use std::sync::Arc; -use chrono::{DateTime, Utc}; use itertools::Itertools; use once_cell::sync::OnceCell; use regex::Regex; use relay_base_schema::metrics::MetricUnit; -use relay_common::time::UnixTimestamp; use relay_event_schema::processor::{ - process_value, MaxChars, ProcessValue, ProcessingAction, ProcessingResult, ProcessingState, - Processor, + MaxChars, ProcessValue, ProcessingAction, ProcessingResult, ProcessingState, Processor, }; use relay_event_schema::protocol::{ Breadcrumb, ClientSdkInfo, Context, Contexts, DebugImage, Event, EventId, EventType, Exception, @@ -23,10 +19,7 @@ use relay_protocol::{ }; use serde::{Deserialize, Serialize}; -use crate::{ - BreakdownsConfig, GeoIpLookup, RawUserAgentInfo, SpanDescriptionRule, StoreConfig, - TransactionNameConfig, -}; +use crate::{GeoIpLookup, StoreConfig}; pub mod breakdowns; pub mod nel; @@ -41,6 +34,8 @@ mod processor; mod request; mod stacktrace; +pub use processor::{NormalizeProcessor, NormalizeProcessorConfig}; + /// Defines a builtin measurement. #[derive(Debug, Default, Clone, Serialize, Deserialize, PartialEq, Hash, Eq)] #[serde(default, rename_all = "camelCase")] @@ -282,131 +277,6 @@ fn normalize_user_geoinfo(geoip_lookup: &GeoIpLookup, user: &mut User) { } } -/// Configuration for [`light_normalize_event`]. -#[derive(Clone, Debug)] -pub struct LightNormalizationConfig<'a> { - /// The IP address of the SDK that sent the event. - /// - /// When `{{auto}}` is specified and there is no other IP address in the payload, such as in the - /// `request` context, this IP address gets added to the `user` context. - pub client_ip: Option<&'a IpAddr>, - - /// The user-agent and client hints obtained from the submission request headers. - /// - /// Client hints are the preferred way to infer device, operating system, and browser - /// information should the event payload contain no such data. If no client hints are present, - /// normalization falls back to the user agent. - pub user_agent: RawUserAgentInfo<&'a str>, - - /// The time at which the event was received in this Relay. - /// - /// This timestamp is persisted into the event payload. - pub received_at: Option>, - - /// The maximum amount of seconds an event can be dated in the past. - /// - /// If the event's timestamp is older, the received timestamp is assumed. - pub max_secs_in_past: Option, - - /// The maximum amount of seconds an event can be predated into the future. - /// - /// If the event's timestamp lies further into the future, the received timestamp is assumed. - pub max_secs_in_future: Option, - - /// The valid time range for transaction events. - /// - /// This time range should be inferred from storage dependencies, such as metrics storage. - /// Transactions with an end timestamp outside of this time range are dropped as invalid. - pub transaction_range: Option>, - - /// The maximum length for names of custom measurements. - /// - /// Measurements with longer names are removed from the transaction event and replaced with a - /// metadata entry. - pub max_name_and_unit_len: Option, - - /// Configuration for measurement normalization in transaction events. - /// - /// Has an optional [`MeasurementsConfig`] from both the project and the global level. - /// If at least one is provided, then normalization will truncate custom measurements - /// and add units of known built-in measurements. - pub measurements: Option>, - - /// Emit breakdowns based on given configuration. - pub breakdowns_config: Option<&'a BreakdownsConfig>, - - /// When `Some(true)`, context information is extracted from the user agent. - pub normalize_user_agent: Option, - - /// Configuration for sanitizing unparameterized transaction names. - pub transaction_name_config: TransactionNameConfig<'a>, - - /// When `Some(true)`, it is assumed that the event has been normalized before. - /// - /// This disables certain normalizations, especially all that are not idempotent. The - /// renormalize mode is intended for the use in the processing pipeline, so an event modified - /// during ingestion can be validated against the schema and large data can be trimmed. However, - /// advanced normalizations such as inferring contexts or clock drift correction are disabled. - /// - /// `None` equals to `false`. - pub is_renormalize: bool, - - /// When `true`, infers the device class from CPU and model. - pub device_class_synthesis_config: bool, - - /// When `true`, extracts tags from event and spans and materializes them into `span.data`. - pub enrich_spans: bool, - - /// When `true`, computes and materializes attributes in spans based on the given configuration. - pub light_normalize_spans: bool, - - /// The maximum allowed size of tag values in bytes. Longer values will be cropped. - pub max_tag_value_length: usize, // TODO: move span related fields into separate config. - - /// Configuration for replacing identifiers in the span description with placeholders. - /// - /// This is similar to `transaction_name_config`, but applies to span descriptions. - pub span_description_rules: Option<&'a Vec>, - - /// Configuration for generating performance score measurements for web vitals - pub performance_score: Option<&'a PerformanceScoreConfig>, - - /// An initialized GeoIP lookup. - pub geoip_lookup: Option<&'a GeoIpLookup>, - - /// When `Some(true)`, individual parts of the event payload is trimmed to a maximum size. - /// - /// See the event schema for size declarations. - pub enable_trimming: bool, -} - -impl Default for LightNormalizationConfig<'_> { - fn default() -> Self { - Self { - client_ip: Default::default(), - user_agent: Default::default(), - received_at: Default::default(), - max_secs_in_past: Default::default(), - max_secs_in_future: Default::default(), - transaction_range: Default::default(), - max_name_and_unit_len: Default::default(), - breakdowns_config: Default::default(), - normalize_user_agent: Default::default(), - transaction_name_config: Default::default(), - is_renormalize: Default::default(), - device_class_synthesis_config: Default::default(), - enrich_spans: Default::default(), - light_normalize_spans: Default::default(), - max_tag_value_length: usize::MAX, - span_description_rules: Default::default(), - performance_score: Default::default(), - geoip_lookup: Default::default(), - enable_trimming: false, - measurements: None, - } - } -} - /// Container for global and project level [`MeasurementsConfig`]. The purpose is to handle /// the merging logic. #[derive(Clone, Debug)] @@ -505,23 +375,6 @@ pub struct PerformanceScoreConfig { pub profiles: Vec, } -/// Normalizes data in the event payload. -/// -/// This function applies a series of transformations on the event payload based -/// on the passed configuration. See the config fields for a description of the -/// normalization steps. There is extended normalization available in the -/// [`StoreProcessor`](crate::StoreProcessor). -/// -/// The returned [`ProcessingResult`] indicates whether the passed event should -/// be ingested or dropped. -pub fn light_normalize_event( - event: &mut Annotated, - config: LightNormalizationConfig, -) -> ProcessingResult { - let mut processor = processor::NormalizeProcessor::new(config.into()); - process_value(event, &mut processor, ProcessingState::root()) -} - impl<'a> Processor for StoreNormalizeProcessor<'a> { fn process_event( &mut self, @@ -860,7 +713,7 @@ fn remove_logger_word(tokens: &mut Vec<&str>) { #[cfg(test)] mod tests { - use chrono::TimeZone; + use chrono::{TimeZone, Utc}; use insta::assert_debug_snapshot; use relay_base_schema::metrics::DurationUnit; use relay_base_schema::spans::SpanStatus; @@ -876,6 +729,8 @@ mod tests { use similar_asserts::assert_eq; use uuid::Uuid; + use crate::normalize::processor::NormalizeProcessorConfig; + use super::*; impl Default for StoreNormalizeProcessor<'_> { @@ -1052,8 +907,12 @@ mod tests { let config = StoreConfig::default(); let mut processor = StoreNormalizeProcessor::new(Arc::new(config), None); - let config = LightNormalizationConfig::default(); - light_normalize_event(&mut event, config).unwrap(); + process_value( + &mut event, + &mut processor::NormalizeProcessor::new(NormalizeProcessorConfig::default()), + ProcessingState::root(), + ) + .unwrap(); process_value(&mut event, &mut processor, ProcessingState::root()).unwrap(); let ip_addr = get_value!(event.user.ip_address!); @@ -1080,8 +939,12 @@ mod tests { let config = StoreConfig::default(); let mut processor = StoreNormalizeProcessor::new(Arc::new(config), None); - let config = LightNormalizationConfig::default(); - light_normalize_event(&mut event, config).unwrap(); + process_value( + &mut event, + &mut processor::NormalizeProcessor::new(NormalizeProcessorConfig::default()), + ProcessingState::root(), + ) + .unwrap(); process_value(&mut event, &mut processor, ProcessingState::root()).unwrap(); assert_eq!(Annotated::empty(), event.value().unwrap().user); @@ -1101,11 +964,15 @@ mod tests { }; let mut processor = StoreNormalizeProcessor::new(Arc::new(config), None); - let config = LightNormalizationConfig { - client_ip: Some(&ip_address), - ..Default::default() - }; - light_normalize_event(&mut event, config).unwrap(); + process_value( + &mut event, + &mut processor::NormalizeProcessor::new(NormalizeProcessorConfig { + client_ip: Some(&ip_address), + ..Default::default() + }), + ProcessingState::root(), + ) + .unwrap(); process_value(&mut event, &mut processor, ProcessingState::root()).unwrap(); let ip_addr = get_value!(event.user.ip_address!); @@ -1130,11 +997,15 @@ mod tests { let geo = GeoIpLookup::open("tests/fixtures/GeoIP2-Enterprise-Test.mmdb").unwrap(); let mut processor = StoreNormalizeProcessor::new(Arc::new(config), Some(&geo)); - let config = LightNormalizationConfig { - client_ip: Some(&ip_address), - ..Default::default() - }; - light_normalize_event(&mut event, config).unwrap(); + process_value( + &mut event, + &mut processor::NormalizeProcessor::new(NormalizeProcessorConfig { + client_ip: Some(&ip_address), + ..Default::default() + }), + ProcessingState::root(), + ) + .unwrap(); process_value(&mut event, &mut processor, ProcessingState::root()).unwrap(); let user = get_value!(event.user!); @@ -1156,11 +1027,15 @@ mod tests { let geo = GeoIpLookup::open("tests/fixtures/GeoIP2-Enterprise-Test.mmdb").unwrap(); let mut processor = StoreNormalizeProcessor::new(Arc::new(config), Some(&geo)); - let config = LightNormalizationConfig { - client_ip: Some(&ip_address), - ..Default::default() - }; - light_normalize_event(&mut event, config).unwrap(); + process_value( + &mut event, + &mut processor::NormalizeProcessor::new(NormalizeProcessorConfig { + client_ip: Some(&ip_address), + ..Default::default() + }), + ProcessingState::root(), + ) + .unwrap(); process_value(&mut event, &mut processor, ProcessingState::root()).unwrap(); let user = get_value!(event.user!); @@ -1172,8 +1047,12 @@ mod tests { fn test_event_level_defaulted() { let processor = &mut StoreNormalizeProcessor::default(); let mut event = Annotated::new(Event::default()); - let config = LightNormalizationConfig::default(); - light_normalize_event(&mut event, config).unwrap(); + process_value( + &mut event, + &mut processor::NormalizeProcessor::new(NormalizeProcessorConfig::default()), + ProcessingState::root(), + ) + .unwrap(); process_value(&mut event, processor, ProcessingState::root()).unwrap(); assert_eq!(get_value!(event.level), Some(&Level::Error)); } @@ -1199,8 +1078,12 @@ mod tests { }, ..Event::default() }); - let config = LightNormalizationConfig::default(); - light_normalize_event(&mut event, config).unwrap(); + process_value( + &mut event, + &mut processor::NormalizeProcessor::new(NormalizeProcessorConfig::default()), + ProcessingState::root(), + ) + .unwrap(); process_value(&mut event, processor, ProcessingState::root()).unwrap(); assert_eq!(get_value!(event.level), Some(&Level::Info)); } @@ -1216,8 +1099,12 @@ mod tests { }); let mut processor = StoreNormalizeProcessor::default(); - let config = LightNormalizationConfig::default(); - light_normalize_event(&mut event, config).unwrap(); + process_value( + &mut event, + &mut processor::NormalizeProcessor::new(NormalizeProcessorConfig::default()), + ProcessingState::root(), + ) + .unwrap(); process_value(&mut event, &mut processor, ProcessingState::root()).unwrap(); let event = event.value().unwrap(); @@ -1238,8 +1125,12 @@ mod tests { }); let mut processor = StoreNormalizeProcessor::default(); - let config = LightNormalizationConfig::default(); - light_normalize_event(&mut event, config).unwrap(); + process_value( + &mut event, + &mut processor::NormalizeProcessor::new(NormalizeProcessorConfig::default()), + ProcessingState::root(), + ) + .unwrap(); process_value(&mut event, &mut processor, ProcessingState::root()).unwrap(); let event = event.value().unwrap(); @@ -1256,8 +1147,12 @@ mod tests { }); let mut processor = StoreNormalizeProcessor::default(); - let config = LightNormalizationConfig::default(); - light_normalize_event(&mut event, config).unwrap(); + process_value( + &mut event, + &mut processor::NormalizeProcessor::new(NormalizeProcessorConfig::default()), + ProcessingState::root(), + ) + .unwrap(); process_value(&mut event, &mut processor, ProcessingState::root()).unwrap(); assert_eq!(get_value!(event.environment), None); } @@ -1273,8 +1168,12 @@ mod tests { ..StoreConfig::default() }; let mut processor = StoreNormalizeProcessor::new(Arc::new(config), None); - let config = LightNormalizationConfig::default(); - light_normalize_event(&mut event, config).unwrap(); + process_value( + &mut event, + &mut processor::NormalizeProcessor::new(NormalizeProcessorConfig::default()), + ProcessingState::root(), + ) + .unwrap(); process_value(&mut event, &mut processor, ProcessingState::root()).unwrap(); let event = event.value().unwrap(); @@ -1297,8 +1196,12 @@ mod tests { }); let mut processor = StoreNormalizeProcessor::default(); - let config = LightNormalizationConfig::default(); - light_normalize_event(&mut event, config).unwrap(); + process_value( + &mut event, + &mut processor::NormalizeProcessor::new(NormalizeProcessorConfig::default()), + ProcessingState::root(), + ) + .unwrap(); process_value(&mut event, &mut processor, ProcessingState::root()).unwrap(); let environment = get_path!(event.environment!); @@ -1323,8 +1226,12 @@ mod tests { }); let mut processor = StoreNormalizeProcessor::default(); - let config = LightNormalizationConfig::default(); - light_normalize_event(&mut event, config).unwrap(); + process_value( + &mut event, + &mut processor::NormalizeProcessor::new(NormalizeProcessorConfig::default()), + ProcessingState::root(), + ) + .unwrap(); process_value(&mut event, &mut processor, ProcessingState::root()).unwrap(); let release = get_path!(event.release!); @@ -1357,8 +1264,12 @@ mod tests { }); let mut processor = StoreNormalizeProcessor::default(); - let config = LightNormalizationConfig::default(); - light_normalize_event(&mut event, config).unwrap(); + process_value( + &mut event, + &mut processor::NormalizeProcessor::new(NormalizeProcessorConfig::default()), + ProcessingState::root(), + ) + .unwrap(); process_value(&mut event, &mut processor, ProcessingState::root()).unwrap(); assert_eq!(get_value!(event.site), None); @@ -1412,8 +1323,12 @@ mod tests { }); let mut processor = StoreNormalizeProcessor::default(); - let config = LightNormalizationConfig::default(); - light_normalize_event(&mut event, config).unwrap(); + process_value( + &mut event, + &mut processor::NormalizeProcessor::new(NormalizeProcessorConfig::default()), + ProcessingState::root(), + ) + .unwrap(); process_value(&mut event, &mut processor, ProcessingState::root()).unwrap(); assert_eq!(get_value!(event.tags!).len(), 1); @@ -1440,8 +1355,12 @@ mod tests { }); let mut processor = StoreNormalizeProcessor::default(); - let config = LightNormalizationConfig::default(); - light_normalize_event(&mut event, config).unwrap(); + process_value( + &mut event, + &mut processor::NormalizeProcessor::new(NormalizeProcessorConfig::default()), + ProcessingState::root(), + ) + .unwrap(); process_value(&mut event, &mut processor, ProcessingState::root()).unwrap(); assert_eq!( @@ -1492,8 +1411,12 @@ mod tests { }); let mut processor = StoreNormalizeProcessor::default(); - let config = LightNormalizationConfig::default(); - light_normalize_event(&mut event, config).unwrap(); + process_value( + &mut event, + &mut processor::NormalizeProcessor::new(NormalizeProcessorConfig::default()), + ProcessingState::root(), + ) + .unwrap(); process_value(&mut event, &mut processor, ProcessingState::root()).unwrap(); // should keep the first occurrence of every tag @@ -1533,8 +1456,12 @@ mod tests { ..StoreConfig::default() }; let mut processor = StoreNormalizeProcessor::new(Arc::new(config), None); - let config = LightNormalizationConfig::default(); - light_normalize_event(&mut event, config).unwrap(); + process_value( + &mut event, + &mut processor::NormalizeProcessor::new(NormalizeProcessorConfig::default()), + ProcessingState::root(), + ) + .unwrap(); process_value(&mut event, &mut processor, ProcessingState::root()).unwrap(); let event = event.value().unwrap(); @@ -1588,8 +1515,12 @@ mod tests { }); let mut processor = StoreNormalizeProcessor::default(); - let config = LightNormalizationConfig::default(); - light_normalize_event(&mut event, config).unwrap(); + process_value( + &mut event, + &mut processor::NormalizeProcessor::new(NormalizeProcessorConfig::default()), + ProcessingState::root(), + ) + .unwrap(); process_value(&mut event, &mut processor, ProcessingState::root()).unwrap(); assert_eq!( @@ -1687,8 +1618,12 @@ mod tests { }); let mut processor = StoreNormalizeProcessor::default(); - let config = LightNormalizationConfig::default(); - light_normalize_event(&mut event, config).unwrap(); + process_value( + &mut event, + &mut processor::NormalizeProcessor::new(NormalizeProcessorConfig::default()), + ProcessingState::root(), + ) + .unwrap(); process_value(&mut event, &mut processor, ProcessingState::root()).unwrap(); assert_eq!( @@ -1720,8 +1655,12 @@ mod tests { let mut event = Annotated::::from_json(json).unwrap(); let mut processor = StoreNormalizeProcessor::default(); - let config = LightNormalizationConfig::default(); - light_normalize_event(&mut event, config).unwrap(); + process_value( + &mut event, + &mut processor::NormalizeProcessor::new(NormalizeProcessorConfig::default()), + ProcessingState::root(), + ) + .unwrap(); process_value(&mut event, &mut processor, ProcessingState::root()).unwrap(); let dist = &event.value().unwrap().dist; @@ -1756,8 +1695,12 @@ mod tests { }); let mut processor = StoreNormalizeProcessor::default(); - let config = LightNormalizationConfig::default(); - light_normalize_event(&mut event, config).unwrap(); + process_value( + &mut event, + &mut processor::NormalizeProcessor::new(NormalizeProcessorConfig::default()), + ProcessingState::root(), + ) + .unwrap(); process_value(&mut event, &mut processor, ProcessingState::root()).unwrap(); assert_eq!( @@ -1786,8 +1729,12 @@ mod tests { None, ); - let config = LightNormalizationConfig::default(); - light_normalize_event(&mut event, config).unwrap(); + process_value( + &mut event, + &mut processor::NormalizeProcessor::new(NormalizeProcessorConfig::default()), + ProcessingState::root(), + ) + .unwrap(); process_value(&mut event, &mut processor, ProcessingState::root()).unwrap(); assert_eq!( @@ -1809,8 +1756,12 @@ mod tests { let mut processor = StoreNormalizeProcessor::default(); - let config = LightNormalizationConfig::default(); - light_normalize_event(&mut event, config).unwrap(); + process_value( + &mut event, + &mut processor::NormalizeProcessor::new(NormalizeProcessorConfig::default()), + ProcessingState::root(), + ) + .unwrap(); process_value(&mut event, &mut processor, ProcessingState::root()).unwrap(); assert_eq!(get_value!(event.received!), get_value!(event.timestamp!)); @@ -1836,8 +1787,12 @@ mod tests { None, ); - let config = LightNormalizationConfig::default(); - light_normalize_event(&mut event, config).unwrap(); + process_value( + &mut event, + &mut processor::NormalizeProcessor::new(NormalizeProcessorConfig::default()), + ProcessingState::root(), + ) + .unwrap(); process_value(&mut event, &mut processor, ProcessingState::root()).unwrap(); insta::assert_ron_snapshot!(SerializableAnnotated(&event), { @@ -1880,11 +1835,15 @@ mod tests { } } "#; - let mut event = Annotated::from_json(json).unwrap(); + let mut event = Annotated::::from_json(json).unwrap(); let mut processor = StoreNormalizeProcessor::default(); - let config = LightNormalizationConfig::default(); - light_normalize_event(&mut event, config).unwrap(); + process_value( + &mut event, + &mut processor::NormalizeProcessor::new(NormalizeProcessorConfig::default()), + ProcessingState::root(), + ) + .unwrap(); process_value(&mut event, &mut processor, ProcessingState::root()).unwrap(); insta::assert_json_snapshot!(SerializableAnnotated(&event), {".received" => "[received]"}, @r#" @@ -1941,13 +1900,17 @@ mod tests { }), None, ); - let config = LightNormalizationConfig { - received_at, - max_secs_in_past, - max_secs_in_future, - ..Default::default() - }; - light_normalize_event(&mut event, config).unwrap(); + process_value( + &mut event, + &mut processor::NormalizeProcessor::new(NormalizeProcessorConfig { + received_at, + max_secs_in_past, + max_secs_in_future, + ..Default::default() + }), + ProcessingState::root(), + ) + .unwrap(); process_value(&mut event, &mut processor, ProcessingState::root()).unwrap(); insta::assert_ron_snapshot!(SerializableAnnotated(&event), { @@ -2000,13 +1963,17 @@ mod tests { }), None, ); - let config = LightNormalizationConfig { - received_at, - max_secs_in_past, - max_secs_in_future, - ..Default::default() - }; - light_normalize_event(&mut event, config).unwrap(); + process_value( + &mut event, + &mut processor::NormalizeProcessor::new(NormalizeProcessorConfig { + received_at, + max_secs_in_past, + max_secs_in_future, + ..Default::default() + }), + ProcessingState::root(), + ) + .unwrap(); process_value(&mut event, &mut processor, ProcessingState::root()).unwrap(); insta::assert_ron_snapshot!(SerializableAnnotated(&event), { @@ -2204,8 +2171,6 @@ mod tests { ..Default::default() }); - let config = LightNormalizationConfig::default(); - fn remove_received_from_event(event: &mut Annotated) -> &mut Annotated { relay_event_schema::processor::apply(event, |e, _m| { e.received = Annotated::empty(); @@ -2215,19 +2180,34 @@ mod tests { event } - light_normalize_event(&mut event, config.clone()).unwrap(); + process_value( + &mut event, + &mut processor::NormalizeProcessor::new(NormalizeProcessorConfig::default()), + ProcessingState::root(), + ) + .unwrap(); let first = remove_received_from_event(&mut event.clone()) .to_json() .unwrap(); // Expected some fields (such as timestamps) exist after first light normalization. - light_normalize_event(&mut event, config.clone()).unwrap(); + process_value( + &mut event, + &mut processor::NormalizeProcessor::new(NormalizeProcessorConfig::default()), + ProcessingState::root(), + ) + .unwrap(); let second = remove_received_from_event(&mut event.clone()) .to_json() .unwrap(); assert_eq!(&first, &second, "idempotency check failed"); - light_normalize_event(&mut event, config).unwrap(); + process_value( + &mut event, + &mut processor::NormalizeProcessor::new(NormalizeProcessorConfig::default()), + ProcessingState::root(), + ) + .unwrap(); let third = remove_received_from_event(&mut event.clone()) .to_json() .unwrap(); @@ -2288,7 +2268,11 @@ mod tests { .spans .set_value(Some(vec![Annotated::::from_json(span).unwrap()])); - let res = light_normalize_event(&mut modified_event, Default::default()); + let res = process_value( + &mut modified_event, + &mut processor::NormalizeProcessor::new(NormalizeProcessorConfig::default()), + ProcessingState::root(), + ); assert!(res.is_err(), "{span:?}"); } @@ -2306,12 +2290,13 @@ mod tests { ) .unwrap(); - let result = light_normalize_event( + let result = process_value( &mut event, - LightNormalizationConfig { + &mut processor::NormalizeProcessor::new(NormalizeProcessorConfig { is_renormalize: true, ..Default::default() - }, + }), + ProcessingState::root(), ); assert!(result.is_ok()); @@ -2359,16 +2344,20 @@ mod tests { .unwrap(); let lookup = GeoIpLookup::open("tests/fixtures/GeoIP2-Enterprise-Test.mmdb").unwrap(); - let config = LightNormalizationConfig { - geoip_lookup: Some(&lookup), - ..Default::default() - }; // Extract user's geo information before normalization. let user_geo = event.value().unwrap().user.value().unwrap().geo.value(); assert!(user_geo.is_none()); - light_normalize_event(&mut event, config).unwrap(); + process_value( + &mut event, + &mut processor::NormalizeProcessor::new(NormalizeProcessorConfig { + geoip_lookup: Some(&lookup), + ..Default::default() + }), + ProcessingState::root(), + ) + .unwrap(); // Extract user's geo information after normalization. let user_geo = event @@ -2516,10 +2505,13 @@ mod tests { "timestamp": -1 }"#; let mut transaction = Annotated::::from_json(json).unwrap(); + let res = process_value( + &mut transaction, + &mut processor::NormalizeProcessor::new(NormalizeProcessorConfig::default()), + ProcessingState::root(), + ); assert_eq!( - light_normalize_event(&mut transaction, LightNormalizationConfig::default()) - .unwrap_err() - .to_string(), + res.unwrap_err().to_string(), "invalid transaction event: timestamp is too stale" ); } diff --git a/relay-event-normalization/src/normalize/processor.rs b/relay-event-normalization/src/normalize/processor.rs index e023552dac..a27360f64d 100644 --- a/relay-event-normalization/src/normalize/processor.rs +++ b/relay-event-normalization/src/normalize/processor.rs @@ -36,39 +36,130 @@ use crate::{ breakdowns, end_all_spans, normalize_transaction_name, schema, set_default_transaction_source, span, trimming, user_agent, validate_transaction, BreakdownsConfig, ClockDriftProcessor, DynamicMeasurementsConfig, GeoIpLookup, PerformanceScoreConfig, RawUserAgentInfo, - TransactionNameConfig, + SpanDescriptionRule, TransactionNameConfig, }; -use crate::LightNormalizationConfig; - /// Configuration for [`NormalizeProcessor`]. -#[derive(Clone, Debug, Default)] -pub(crate) struct NormalizeProcessorConfig<'a> { - /// Light normalization config. - // XXX(iker): we should split this config appropriately. - pub light_config: LightNormalizationConfig<'a>, +#[derive(Clone, Debug)] +pub struct NormalizeProcessorConfig<'a> { + /// The IP address of the SDK that sent the event. + /// + /// When `{{auto}}` is specified and there is no other IP address in the payload, such as in the + /// `request` context, this IP address gets added to the `user` context. + pub client_ip: Option<&'a IpAddr>, - /// Configuration to apply to transaction names, especially around sanitizing. - pub transaction_name_config: TransactionNameConfig<'a>, + /// The user-agent and client hints obtained from the submission request headers. + /// + /// Client hints are the preferred way to infer device, operating system, and browser + /// information should the event payload contain no such data. If no client hints are present, + /// normalization falls back to the user agent. + pub user_agent: RawUserAgentInfo<&'a str>, + + /// The time at which the event was received in this Relay. + /// + /// This timestamp is persisted into the event payload. + pub received_at: Option>, + + /// The maximum amount of seconds an event can be dated in the past. + /// + /// If the event's timestamp is older, the received timestamp is assumed. + pub max_secs_in_past: Option, + + /// The maximum amount of seconds an event can be predated into the future. + /// + /// If the event's timestamp lies further into the future, the received timestamp is assumed. + pub max_secs_in_future: Option, /// Timestamp range in which a transaction must end. /// /// Transactions that finish outside of this range are considered invalid. /// This check is skipped if no range is provided. pub transaction_range: Option>, -} -impl<'a> From> for NormalizeProcessorConfig<'a> { - fn from(mut config: LightNormalizationConfig<'a>) -> Self { - // HACK(iker): workaround to avoid cloning of config items. We'll get - // rid of this when we remove light normalization in the next step. - let transaction_name_config = std::mem::take(&mut config.transaction_name_config); - let transaction_range = config.transaction_range.take(); + /// The maximum length for names of custom measurements. + /// + /// Measurements with longer names are removed from the transaction event and replaced with a + /// metadata entry. + pub max_name_and_unit_len: Option, + + /// Configuration for measurement normalization in transaction events. + /// + /// Has an optional [`crate::MeasurementsConfig`] from both the project and the global level. + /// If at least one is provided, then normalization will truncate custom measurements + /// and add units of known built-in measurements. + pub measurements: Option>, + + /// Emit breakdowns based on given configuration. + pub breakdowns_config: Option<&'a BreakdownsConfig>, + + /// When `Some(true)`, context information is extracted from the user agent. + pub normalize_user_agent: Option, + + /// Configuration to apply to transaction names, especially around sanitizing. + pub transaction_name_config: TransactionNameConfig<'a>, + + /// When `Some(true)`, it is assumed that the event has been normalized before. + /// + /// This disables certain normalizations, especially all that are not idempotent. The + /// renormalize mode is intended for the use in the processing pipeline, so an event modified + /// during ingestion can be validated against the schema and large data can be trimmed. However, + /// advanced normalizations such as inferring contexts or clock drift correction are disabled. + /// + /// `None` equals to `false`. + pub is_renormalize: bool, + + /// When `true`, infers the device class from CPU and model. + pub device_class_synthesis_config: bool, + + /// When `true`, extracts tags from event and spans and materializes them into `span.data`. + pub enrich_spans: bool, + + /// When `true`, computes and materializes attributes in spans based on the given configuration. + pub light_normalize_spans: bool, + + /// The maximum allowed size of tag values in bytes. Longer values will be cropped. + pub max_tag_value_length: usize, // TODO: move span related fields into separate config. + + /// Configuration for replacing identifiers in the span description with placeholders. + /// + /// This is similar to `transaction_name_config`, but applies to span descriptions. + pub span_description_rules: Option<&'a Vec>, + + /// Configuration for generating performance score measurements for web vitals + pub performance_score: Option<&'a PerformanceScoreConfig>, + + /// An initialized GeoIP lookup. + pub geoip_lookup: Option<&'a GeoIpLookup>, + + /// When `Some(true)`, individual parts of the event payload is trimmed to a maximum size. + /// + /// See the event schema for size declarations. + pub enable_trimming: bool, +} +impl<'a> Default for NormalizeProcessorConfig<'a> { + fn default() -> Self { Self { - light_config: config, - transaction_name_config, - transaction_range, + client_ip: Default::default(), + user_agent: Default::default(), + received_at: Default::default(), + max_secs_in_past: Default::default(), + max_secs_in_future: Default::default(), + transaction_range: Default::default(), + max_name_and_unit_len: Default::default(), + breakdowns_config: Default::default(), + normalize_user_agent: Default::default(), + transaction_name_config: Default::default(), + is_renormalize: Default::default(), + device_class_synthesis_config: Default::default(), + enrich_spans: Default::default(), + light_normalize_spans: Default::default(), + max_tag_value_length: usize::MAX, + span_description_rules: Default::default(), + performance_score: Default::default(), + geoip_lookup: Default::default(), + enable_trimming: false, + measurements: None, } } } @@ -81,12 +172,13 @@ impl<'a> From> for NormalizeProcessorConfig<'a> { /// The returned [`ProcessingResult`] indicates whether the passed event should /// be ingested or dropped. #[derive(Debug, Default)] -pub(crate) struct NormalizeProcessor<'a> { +pub struct NormalizeProcessor<'a> { /// Configuration for the normalization steps. config: NormalizeProcessorConfig<'a>, } impl<'a> NormalizeProcessor<'a> { + /// Returns a new [`NormalizeProcessor`] with the given config. pub fn new(config: NormalizeProcessorConfig<'a>) -> Self { Self { config } } @@ -128,8 +220,7 @@ impl<'a> Processor for NormalizeProcessor<'a> { // below this call is being moved (WIP) to the processor appropriately. event.process_child_values(self, state)?; - let config = &self.config.light_config; - if config.is_renormalize { + if self.config.is_renormalize { return Ok(()); } @@ -139,20 +230,20 @@ impl<'a> Processor for NormalizeProcessor<'a> { TimestampProcessor.process_event(event, meta, ProcessingState::root())?; // Process security reports first to ensure all props. - normalize_security_report(event, config.client_ip, &config.user_agent); + normalize_security_report(event, self.config.client_ip, &self.config.user_agent); // Process NEL reports to ensure all props. - normalize_nel_report(event, config.client_ip); + normalize_nel_report(event, self.config.client_ip); // Insert IP addrs before recursing, since geo lookup depends on it. normalize_ip_addresses( &mut event.request, &mut event.user, event.platform.as_str(), - config.client_ip, + self.config.client_ip, ); - if let Some(geoip_lookup) = config.geoip_lookup { + if let Some(geoip_lookup) = self.config.geoip_lookup { if let Some(user) = event.user.value_mut() { normalize_user_geoinfo(geoip_lookup, user) } @@ -182,31 +273,31 @@ impl<'a> Processor for NormalizeProcessor<'a> { normalize_timestamps( event, meta, - config.received_at, - config.max_secs_in_past, - config.max_secs_in_future, + self.config.received_at, + self.config.max_secs_in_past, + self.config.max_secs_in_future, )?; // Timestamps are core in the metrics extraction normalize_event_tags(event)?; // Tags are added to every metric // TODO: Consider moving to store normalization - if config.device_class_synthesis_config { + if self.config.device_class_synthesis_config { normalize_device_class(event); } light_normalize_stacktraces(event)?; normalize_exceptions(event)?; // Browser extension filters look at the stacktrace - normalize_user_agent(event, config.normalize_user_agent); // Legacy browsers filter + normalize_user_agent(event, self.config.normalize_user_agent); // Legacy browsers filter normalize_measurements( event, - config.measurements.clone(), - config.max_name_and_unit_len, + self.config.measurements.clone(), + self.config.max_name_and_unit_len, ); // Measurements are part of the metric extraction - normalize_performance_score(event, config.performance_score); - normalize_breakdowns(event, config.breakdowns_config); // Breakdowns are part of the metric extraction too + normalize_performance_score(event, self.config.performance_score); + normalize_breakdowns(event, self.config.breakdowns_config); // Breakdowns are part of the metric extraction too // Some contexts need to be normalized before metrics extraction takes place. processor::apply(&mut event.contexts, normalize_contexts)?; - if config.light_normalize_spans && event.ty.value() == Some(&EventType::Transaction) { + if self.config.light_normalize_spans && event.ty.value() == Some(&EventType::Transaction) { // XXX(iker): span normalization runs in the store processor, but // the exclusive time is required for span metrics. Most of // transactions don't have many spans, but if this is no longer the @@ -219,16 +310,16 @@ impl<'a> Processor for NormalizeProcessor<'a> { ); } - if config.enrich_spans { + if self.config.enrich_spans { extract_span_tags( event, &tag_extraction::Config { - max_tag_value_size: config.max_tag_value_length, + max_tag_value_size: self.config.max_tag_value_length, }, ); } - if config.enable_trimming { + if self.config.enable_trimming { // Trim large strings and databags down trimming::TrimmingProcessor::new().process_event( event, @@ -1813,7 +1904,7 @@ mod tests { assert_eq!(get_value!(processed.op!), &"default".to_owned()); let mut reprocess_config = processor_config.clone(); - reprocess_config.light_config.is_renormalize = true; + reprocess_config.is_renormalize = true; let mut processor = NormalizeProcessor::new(processor_config.clone()); let mut reprocessed = processed.clone(); @@ -1850,7 +1941,7 @@ mod tests { assert_eq!(trace_context.op.value().unwrap(), "default"); let mut reprocess_config = processor_config.clone(); - reprocess_config.light_config.is_renormalize = true; + reprocess_config.is_renormalize = true; let mut processor = NormalizeProcessor::new(processor_config.clone()); let mut reprocessed = processed.clone(); diff --git a/relay-event-normalization/src/normalize/span/tag_extraction.rs b/relay-event-normalization/src/normalize/span/tag_extraction.rs index 81ae959a85..607ed17c80 100644 --- a/relay-event-normalization/src/normalize/span/tag_extraction.rs +++ b/relay-event-normalization/src/normalize/span/tag_extraction.rs @@ -563,11 +563,13 @@ fn span_op_to_category(op: &str) -> Option<&str> { #[cfg(test)] mod tests { + use relay_event_schema::processor::{process_value, ProcessingState}; use relay_event_schema::protocol::{Event, Request}; use relay_protocol::Annotated; + use crate::{NormalizeProcessor, NormalizeProcessorConfig}; + use super::*; - use crate::LightNormalizationConfig; #[test] fn test_truncate_string_no_panic() { @@ -636,13 +638,14 @@ mod tests { } // Normalize first, to make sure that all things are correct as in the real pipeline: - let res = crate::light_normalize_event( + let res = process_value( &mut event, - LightNormalizationConfig { + &mut NormalizeProcessor::new(NormalizeProcessorConfig { enrich_spans: true, light_normalize_spans: true, ..Default::default() - }, + }), + ProcessingState::root(), ); assert!(res.is_ok()); diff --git a/relay-server/src/actors/processor.rs b/relay-server/src/actors/processor.rs index a066494348..8c6a5055a7 100644 --- a/relay-server/src/actors/processor.rs +++ b/relay-server/src/actors/processor.rs @@ -24,11 +24,11 @@ use relay_dynamic_config::{ }; use relay_event_normalization::replay::{self, ReplayError}; use relay_event_normalization::{ - nel, ClockDriftProcessor, DynamicMeasurementsConfig, LightNormalizationConfig, - MeasurementsConfig, TransactionNameConfig, + nel, ClockDriftProcessor, DynamicMeasurementsConfig, MeasurementsConfig, + NormalizeProcessorConfig, TransactionNameConfig, }; use relay_event_normalization::{GeoIpLookup, RawUserAgentInfo}; -use relay_event_schema::processor::{self, ProcessingAction, ProcessingState}; +use relay_event_schema::processor::{self, process_value, ProcessingAction, ProcessingState}; use relay_event_schema::protocol::{ Breadcrumb, ClientReport, Contexts, Csp, Event, EventType, ExpectCt, ExpectStaple, Hpkp, IpAddr, LenientString, Metrics, NetworkReportError, OtelContext, ProfileContext, RelayInfo, @@ -2676,7 +2676,7 @@ impl EnvelopeProcessorService { .aggregator_config_for(MetricNamespace::Transactions); utils::log_transaction_name_metrics(&mut state.event, |event| { - let config = LightNormalizationConfig { + let config = NormalizeProcessorConfig { client_ip: client_ipaddr.as_ref(), user_agent: RawUserAgentInfo { user_agent: request_meta.user_agent(), @@ -2722,8 +2722,12 @@ impl EnvelopeProcessorService { }; metric!(timer(RelayTimers::EventProcessingLightNormalization), { - relay_event_normalization::light_normalize_event(event, config) - .map_err(|_| ProcessingError::InvalidTransaction) + process_value( + event, + &mut relay_event_normalization::NormalizeProcessor::new(config), + ProcessingState::root(), + ) + .map_err(|_| ProcessingError::InvalidTransaction) }) })?; @@ -3127,7 +3131,9 @@ mod tests { use chrono::{DateTime, TimeZone, Utc}; use relay_base_schema::metrics::{DurationUnit, MetricUnit}; use relay_common::glob2::LazyGlob; - use relay_event_normalization::{MeasurementsConfig, RedactionRule, TransactionNameRule}; + use relay_event_normalization::{ + MeasurementsConfig, NormalizeProcessor, RedactionRule, TransactionNameRule, + }; use relay_event_schema::protocol::{EventId, TransactionSource}; use relay_pii::DataScrubbingConfig; use relay_protocol::RuleCondition; @@ -4163,7 +4169,7 @@ mod tests { relay_statsd::with_capturing_test_client(|| { utils::log_transaction_name_metrics(&mut event, |event| { - let config = LightNormalizationConfig { + let config = NormalizeProcessorConfig { transaction_name_config: TransactionNameConfig { rules: &[TransactionNameRule { pattern: LazyGlob::new("/foo/*/**".to_owned()), @@ -4175,7 +4181,11 @@ mod tests { }, ..Default::default() }; - relay_event_normalization::light_normalize_event(event, config) + process_value( + event, + &mut NormalizeProcessor::new(config), + ProcessingState::root(), + ) }) .unwrap(); }) diff --git a/relay-server/src/metrics_extraction/event.rs b/relay-server/src/metrics_extraction/event.rs index 70781a945f..a0fbf0476a 100644 --- a/relay-server/src/metrics_extraction/event.rs +++ b/relay-server/src/metrics_extraction/event.rs @@ -63,7 +63,8 @@ pub fn extract_metrics(event: &Event, config: &MetricExtractionConfig) -> Vec