From a39d5874b9d35d47c84510f347d39a0a5061fe47 Mon Sep 17 00:00:00 2001 From: Joris Bayer <jjbayer@gmail.com> Date: Fri, 2 Aug 2024 11:00:03 +0200 Subject: [PATCH] Revert "feat(process-value): Add object level trimming (#3877)" This reverts commit ac0bde335b17c015c82851a8ad9e227e1c4c3e43. --- CHANGELOG.md | 1 - relay-event-derive/src/lib.rs | 68 ++------ relay-event-normalization/src/trimming.rs | 199 +++++++++++++++++----- relay-event-schema/src/processor/attrs.rs | 2 +- relay-event-schema/src/protocol/span.rs | 15 +- 5 files changed, 178 insertions(+), 107 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 52d616045fa..94a711a66b1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,6 @@ - Add experimental support for V2 envelope buffering. ([#3855](https://github.com/getsentry/relay/pull/3855), [#3863](https://github.com/getsentry/relay/pull/3863)) - Add `client_sample_rate` to spans, pulled from the trace context. ([#3872](https://github.com/getsentry/relay/pull/3872)) -- Introduce `trim = "disabled"` type attribute to prevent trimming of spans. ([#3877](https://github.com/getsentry/relay/pull/3877)) ## 24.7.1 diff --git a/relay-event-derive/src/lib.rs b/relay-event-derive/src/lib.rs index 8af3fc4af99..814802af2b1 100644 --- a/relay-event-derive/src/lib.rs +++ b/relay-event-derive/src/lib.rs @@ -12,7 +12,6 @@ use proc_macro2::{Span, TokenStream}; use quote::{quote, ToTokens}; -use std::str::FromStr; use syn::{Ident, Lit, Meta, NestedMeta}; use synstructure::decl_derive; @@ -91,8 +90,7 @@ fn derive_process_value(mut s: synstructure::Structure<'_>) -> TokenStream { let mut body = TokenStream::new(); for (index, bi) in variant.bindings().iter().enumerate() { - let mut field_attrs = parse_field_attributes(index, bi.ast(), &mut is_tuple_struct); - field_attrs.trim = Some(matches!(type_attrs.trim, TrimmingMode::Enabled)); + let field_attrs = parse_field_attributes(index, bi.ast(), &mut is_tuple_struct); let ident = &bi.binding; let field_attrs_name = Ident::new(&format!("FIELD_ATTRS_{index}"), Span::call_site()); let field_name = field_attrs.field_name.clone(); @@ -171,14 +169,6 @@ fn derive_process_value(mut s: synstructure::Structure<'_>) -> TokenStream { } }); - // In case we have `trim` set on the type, we want to override this field attribute - // manually. - let field_attrs = FieldAttrs { - trim: Some(matches!(type_attrs.trim, TrimmingMode::Enabled)), - ..Default::default() - }; - let field_attrs_tokens = field_attrs.as_tokens(Some(quote!(parent_attrs))); - s.gen_impl(quote! { #[automatically_derived] gen impl crate::processor::ProcessValue for @Self { @@ -197,13 +187,6 @@ fn derive_process_value(mut s: synstructure::Structure<'_>) -> TokenStream { where P: crate::processor::Processor, { - let parent_attrs = __state.attrs(); - let attrs = #field_attrs_tokens; - - let __state = &__state.enter_nothing( - Some(::std::borrow::Cow::Owned(attrs)) - ); - #process_func_call_tokens; match *self { #process_value_arms @@ -231,32 +214,10 @@ fn derive_process_value(mut s: synstructure::Structure<'_>) -> TokenStream { }) } -#[derive(Default, Copy, Clone)] -enum TrimmingMode { - #[default] - Enabled, - Disabled, -} - -impl FromStr for TrimmingMode { - type Err = (); - - fn from_str(s: &str) -> Result<Self, Self::Err> { - let trimming_mode = match s { - "enabled" => TrimmingMode::Enabled, - "disabled" => TrimmingMode::Disabled, - _ => return Err(()), - }; - - Ok(trimming_mode) - } -} - #[derive(Default)] struct TypeAttrs { process_func: Option<String>, value_type: Vec<String>, - trim: TrimmingMode, } impl TypeAttrs { @@ -315,19 +276,7 @@ fn parse_type_attributes(s: &synstructure::Structure<'_>) -> TypeAttrs { rv.value_type.push(litstr.value()); } _ => { - panic!("Got non string literal for value_type"); - } - } - } else if ident == "trim" { - match name_value.lit { - Lit::Str(litstr) => { - rv.trim = litstr - .value() - .parse() - .expect("Got invalid value for trim") - } - _ => { - panic!("Got invalid value for trim"); + panic!("Got non string literal for value type"); } } } @@ -481,7 +430,7 @@ impl FieldAttrs { max_bytes: #max_bytes, pii: #pii, retain: #retain, - trim: #trim + trim: #trim, } }) } @@ -656,6 +605,17 @@ fn parse_field_attributes( panic!("Got non string literal for retain"); } } + } else if ident == "trim" { + match name_value.lit { + Lit::Str(litstr) => match litstr.value().as_str() { + "true" => rv.trim = None, + "false" => rv.trim = Some(false), + other => panic!("Unknown value {other}"), + }, + _ => { + panic!("Got non string literal for trim"); + } + } } else if ident == "legacy_alias" || ident == "skip_serialization" { // Skip } else { diff --git a/relay-event-normalization/src/trimming.rs b/relay-event-normalization/src/trimming.rs index 6b823dd0b0f..56b44f98efe 100644 --- a/relay-event-normalization/src/trimming.rs +++ b/relay-event-normalization/src/trimming.rs @@ -54,10 +54,6 @@ impl TrimmingProcessor { .filter_map(|x| x.size_remaining) .min() } - - fn should_trim(&self, state: &ProcessingState<'_>) -> bool { - state.attrs().trim - } } impl Processor for TrimmingProcessor { @@ -78,7 +74,7 @@ impl Processor for TrimmingProcessor { }); } - if self.should_trim(state) { + if state.attrs().trim { if self.remaining_size() == Some(0) { // TODO: Create remarks (ensure they do not bloat event) return Err(ProcessingAction::DeleteValueHard); @@ -88,7 +84,6 @@ impl Processor for TrimmingProcessor { return Err(ProcessingAction::DeleteValueHard); } } - Ok(()) } @@ -137,11 +132,13 @@ impl Processor for TrimmingProcessor { trim_string(value, meta, max_chars, state.attrs().max_chars_allowance); } - if self.should_trim(state) { - if let Some(size_state) = self.size_state.last() { - if let Some(size_remaining) = size_state.size_remaining { - trim_string(value, meta, size_remaining, 0); - } + if !state.attrs().trim { + return Ok(()); + } + + if let Some(size_state) = self.size_state.last() { + if let Some(size_remaining) = size_state.size_remaining { + trim_string(value, meta, size_remaining, 0); } } @@ -157,6 +154,10 @@ impl Processor for TrimmingProcessor { where T: ProcessValue, { + if !state.attrs().trim { + return Ok(()); + } + // If we need to check the bag size, then we go down a different path if !self.size_state.is_empty() { let original_length = value.len(); @@ -176,14 +177,12 @@ impl Processor for TrimmingProcessor { processor::process_value(item, self, &item_state)?; } - if self.should_trim(state) { - if let Some(split_index) = split_index { - let _ = value.split_off(split_index); - } + if let Some(split_index) = split_index { + let _ = value.split_off(split_index); + } - if value.len() != original_length { - meta.set_original_length(Some(original_length)); - } + if value.len() != original_length { + meta.set_original_length(Some(original_length)); } } else { value.process_child_values(self, state)?; @@ -201,6 +200,10 @@ impl Processor for TrimmingProcessor { where T: ProcessValue, { + if !state.attrs().trim { + return Ok(()); + } + // If we need to check the bag size, then we go down a different path if !self.size_state.is_empty() { let original_length = value.len(); @@ -220,14 +223,12 @@ impl Processor for TrimmingProcessor { processor::process_value(item, self, &item_state)?; } - if self.should_trim(state) { - if let Some(split_key) = split_key { - let _ = value.split_off(&split_key); - } + if let Some(split_key) = split_key { + let _ = value.split_off(&split_key); + } - if value.len() != original_length { - meta.set_original_length(Some(original_length)); - } + if value.len() != original_length { + meta.set_original_length(Some(original_length)); } } else { value.process_child_values(self, state)?; @@ -242,6 +243,10 @@ impl Processor for TrimmingProcessor { _meta: &mut Meta, state: &ProcessingState<'_>, ) -> ProcessingResult { + if !state.attrs().trim { + return Ok(()); + } + match value { Value::Array(_) | Value::Object(_) => { if self.remaining_depth(state) == Some(1) { @@ -273,6 +278,10 @@ impl Processor for TrimmingProcessor { _meta: &mut Meta, state: &ProcessingState<'_>, ) -> ProcessingResult { + if !state.attrs().trim { + return Ok(()); + } + processor::apply(&mut stacktrace.frames, |frames, meta| { enforce_frame_hard_limit(frames, meta, 250); Ok(()) @@ -413,8 +422,10 @@ fn slim_frame_data(frames: &mut Array<Frame>, frame_allowance: usize) { mod tests { use std::iter::repeat; + use chrono::DateTime; use relay_event_schema::protocol::{ - Breadcrumb, Context, Contexts, Event, Exception, ExtraValue, Span, TagEntry, Tags, Values, + Breadcrumb, Context, Contexts, Event, Exception, ExtraValue, Span, SpanId, TagEntry, Tags, + Timestamp, TraceId, Values, }; use relay_protocol::{get_value, Map, Remark, SerializableAnnotated}; use similar_asserts::assert_eq; @@ -933,10 +944,8 @@ mod tests { #[test] fn test_too_many_spans_trimmed() { - let long_platform = "a".repeat(1024 * 100); - let span = Span { - platform: Annotated::new(long_platform.clone()), + platform: Annotated::new("a".repeat(1024 * 100)), ..Default::default() }; let spans = std::iter::repeat_with(|| Annotated::new(span.clone())) @@ -951,24 +960,124 @@ mod tests { let mut processor = TrimmingProcessor::new(); processor::process_value(&mut event, &mut processor, ProcessingState::root()).unwrap(); - // We check that all the spans that were kept have no trimmed fields. - let trimmed_spans = get_value!(event.spans).unwrap(); - assert_eq!(trimmed_spans.len(), 8); - for trimmed_span in trimmed_spans { - let platform = trimmed_span.value().unwrap().platform.value().unwrap(); - assert_eq!(long_platform, *platform); - } + assert_eq!(event.0.unwrap().spans.0.unwrap().len(), 8); + } + + #[test] + fn test_untrimmable_fields() { + let original_description = "a".repeat(819163); + let original_trace_id = TraceId("b".repeat(48)); + let mut event = Annotated::new(Event { + spans: Annotated::new(vec![ + Span { + description: original_description.clone().into(), + ..Default::default() + } + .into(), + Span { + trace_id: original_trace_id.clone().into(), + ..Default::default() + } + .into(), + ]), + ..Default::default() + }); + + let mut processor = TrimmingProcessor::new(); + processor::process_value(&mut event, &mut processor, ProcessingState::root()).unwrap(); + + assert_eq!( + get_value!(event.spans[0].description!), + &original_description + ); + // Trace ID would be trimmed without `trim = "false"` + assert_eq!(get_value!(event.spans[1].trace_id!), &original_trace_id); + } + + #[test] + fn test_untrimmable_fields_drop() { + let original_description = "a".repeat(819164); + let original_span_id = SpanId("b".repeat(48)); + let original_trace_id = TraceId("c".repeat(48)); + let original_segment_id = SpanId("d".repeat(48)); + let original_op = "e".repeat(129); + + let mut event = Annotated::new(Event { + spans: Annotated::new(vec![ + Span { + description: original_description.clone().into(), + ..Default::default() + } + .into(), + Span { + span_id: original_span_id.clone().into(), + trace_id: original_trace_id.clone().into(), + segment_id: original_segment_id.clone().into(), + is_segment: false.into(), + op: original_op.clone().into(), + start_timestamp: Timestamp( + DateTime::parse_from_rfc3339("1996-12-19T16:39:57Z") + .unwrap() + .into(), + ) + .into(), + timestamp: Timestamp( + DateTime::parse_from_rfc3339("1996-12-19T16:39:58Z") + .unwrap() + .into(), + ) + .into(), + ..Default::default() + } + .into(), + ]), + ..Default::default() + }); + + let mut processor = TrimmingProcessor::new(); + processor::process_value(&mut event, &mut processor, ProcessingState::root()).unwrap(); - // We check that the meta on spans is preserved signaling that we originally had 10 - // elements. assert_eq!( - get_value!(event) - .unwrap() - .spans - .meta() - .original_length() - .unwrap(), - 10 + get_value!(event.spans[0].description!), + &original_description ); + // These fields would be dropped without `trim = "false"` + assert_eq!(get_value!(event.spans[1].span_id!), &original_span_id); + assert_eq!(get_value!(event.spans[1].trace_id!), &original_trace_id); + assert_eq!(get_value!(event.spans[1].segment_id!), &original_segment_id); + assert_eq!(get_value!(event.spans[1].is_segment!), &false); + // span.op is trimmed to its max_chars, but not dropped: + assert_eq!(get_value!(event.spans[1].op!).len(), 128); + assert!(get_value!(event.spans[1].start_timestamp).is_some()); + assert!(get_value!(event.spans[1].timestamp).is_some()); + } + + #[test] + fn test_trim_false_contributes_to_budget() { + for span_id in ["short", "looooooooooooooooooooooooooong"] { + let original_span_id = SpanId(span_id.to_owned()); + let original_description = "a".repeat(900000); + + let mut event = Annotated::new(Event { + spans: Annotated::new(vec![Span { + span_id: original_span_id.clone().into(), + description: original_description.clone().into(), + ..Default::default() + } + .into()]), + ..Default::default() + }); + + let mut processor = TrimmingProcessor::new(); + processor::process_value(&mut event, &mut processor, ProcessingState::root()).unwrap(); + + assert_eq!(get_value!(event.spans[0].span_id!).as_ref(), span_id); + + // The amount of trimming on the description depends on the length of the span id. + assert_eq!( + get_value!(event.spans[0].description!).len(), + 1024 * 800 - 12 - span_id.len(), + ); + } } } diff --git a/relay-event-schema/src/processor/attrs.rs b/relay-event-schema/src/processor/attrs.rs index bbd417a939f..c80a9fe7c63 100644 --- a/relay-event-schema/src/processor/attrs.rs +++ b/relay-event-schema/src/processor/attrs.rs @@ -128,7 +128,7 @@ pub struct FieldAttrs { pub pii: Pii, /// Whether additional properties should be retained during normalization. pub retain: bool, - /// `true` if the field is allowed to be trimmed. + /// Whether the trimming processor is allowed to shorten or drop this field. pub trim: bool, } diff --git a/relay-event-schema/src/protocol/span.rs b/relay-event-schema/src/protocol/span.rs index 032b9224294..33410bba2be 100644 --- a/relay-event-schema/src/protocol/span.rs +++ b/relay-event-schema/src/protocol/span.rs @@ -12,14 +12,14 @@ use crate::protocol::{ #[derive(Clone, Debug, Default, PartialEq, Empty, FromValue, IntoValue, ProcessValue)] #[cfg_attr(feature = "jsonschema", derive(JsonSchema))] -#[metastructure(process_func = "process_span", value_type = "Span", trim = "disabled")] +#[metastructure(process_func = "process_span", value_type = "Span")] pub struct Span { /// Timestamp when the span was ended. - #[metastructure(required = "true")] + #[metastructure(required = "true", trim = "false")] pub timestamp: Annotated<Timestamp>, /// Timestamp when the span started. - #[metastructure(required = "true")] + #[metastructure(required = "true", trim = "false")] pub start_timestamp: Annotated<Timestamp>, /// The amount of time in milliseconds spent in this span, @@ -27,27 +27,30 @@ pub struct Span { pub exclusive_time: Annotated<f64>, /// Span type (see `OperationType` docs). - #[metastructure(max_chars = 128)] + #[metastructure(max_chars = 128, trim = "false")] pub op: Annotated<OperationType>, /// The Span id. - #[metastructure(required = "true")] + #[metastructure(required = "true", trim = "false")] pub span_id: Annotated<SpanId>, /// The ID of the span enclosing this span. + #[metastructure(trim = "false")] pub parent_span_id: Annotated<SpanId>, /// The ID of the trace the span belongs to. - #[metastructure(required = "true")] + #[metastructure(required = "true", trim = "false")] pub trace_id: Annotated<TraceId>, /// A unique identifier for a segment within a trace (8 byte hexadecimal string). /// /// For spans embedded in transactions, the `segment_id` is the `span_id` of the containing /// transaction. + #[metastructure(trim = "false")] pub segment_id: Annotated<SpanId>, /// Whether or not the current span is the root of the segment. + #[metastructure(trim = "false")] pub is_segment: Annotated<bool>, /// The status of a span.