From a39d5874b9d35d47c84510f347d39a0a5061fe47 Mon Sep 17 00:00:00 2001
From: Joris Bayer <jjbayer@gmail.com>
Date: Fri, 2 Aug 2024 11:00:03 +0200
Subject: [PATCH] Revert "feat(process-value): Add object level trimming
 (#3877)"

This reverts commit ac0bde335b17c015c82851a8ad9e227e1c4c3e43.
---
 CHANGELOG.md                              |   1 -
 relay-event-derive/src/lib.rs             |  68 ++------
 relay-event-normalization/src/trimming.rs | 199 +++++++++++++++++-----
 relay-event-schema/src/processor/attrs.rs |   2 +-
 relay-event-schema/src/protocol/span.rs   |  15 +-
 5 files changed, 178 insertions(+), 107 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 52d616045fa..94a711a66b1 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -10,7 +10,6 @@
 
 - Add experimental support for V2 envelope buffering. ([#3855](https://github.com/getsentry/relay/pull/3855), [#3863](https://github.com/getsentry/relay/pull/3863))
 - Add `client_sample_rate` to spans, pulled from the trace context. ([#3872](https://github.com/getsentry/relay/pull/3872))
-- Introduce `trim = "disabled"` type attribute to prevent trimming of spans. ([#3877](https://github.com/getsentry/relay/pull/3877))
 
 ## 24.7.1
 
diff --git a/relay-event-derive/src/lib.rs b/relay-event-derive/src/lib.rs
index 8af3fc4af99..814802af2b1 100644
--- a/relay-event-derive/src/lib.rs
+++ b/relay-event-derive/src/lib.rs
@@ -12,7 +12,6 @@
 
 use proc_macro2::{Span, TokenStream};
 use quote::{quote, ToTokens};
-use std::str::FromStr;
 use syn::{Ident, Lit, Meta, NestedMeta};
 use synstructure::decl_derive;
 
@@ -91,8 +90,7 @@ fn derive_process_value(mut s: synstructure::Structure<'_>) -> TokenStream {
 
         let mut body = TokenStream::new();
         for (index, bi) in variant.bindings().iter().enumerate() {
-            let mut field_attrs = parse_field_attributes(index, bi.ast(), &mut is_tuple_struct);
-            field_attrs.trim = Some(matches!(type_attrs.trim, TrimmingMode::Enabled));
+            let field_attrs = parse_field_attributes(index, bi.ast(), &mut is_tuple_struct);
             let ident = &bi.binding;
             let field_attrs_name = Ident::new(&format!("FIELD_ATTRS_{index}"), Span::call_site());
             let field_name = field_attrs.field_name.clone();
@@ -171,14 +169,6 @@ fn derive_process_value(mut s: synstructure::Structure<'_>) -> TokenStream {
         }
     });
 
-    // In case we have `trim` set on the type, we want to override this field attribute
-    // manually.
-    let field_attrs = FieldAttrs {
-        trim: Some(matches!(type_attrs.trim, TrimmingMode::Enabled)),
-        ..Default::default()
-    };
-    let field_attrs_tokens = field_attrs.as_tokens(Some(quote!(parent_attrs)));
-
     s.gen_impl(quote! {
         #[automatically_derived]
         gen impl crate::processor::ProcessValue for @Self {
@@ -197,13 +187,6 @@ fn derive_process_value(mut s: synstructure::Structure<'_>) -> TokenStream {
             where
                 P: crate::processor::Processor,
             {
-                let parent_attrs = __state.attrs();
-                let attrs = #field_attrs_tokens;
-
-                let __state = &__state.enter_nothing(
-                    Some(::std::borrow::Cow::Owned(attrs))
-                );
-
                 #process_func_call_tokens;
                 match *self {
                     #process_value_arms
@@ -231,32 +214,10 @@ fn derive_process_value(mut s: synstructure::Structure<'_>) -> TokenStream {
     })
 }
 
-#[derive(Default, Copy, Clone)]
-enum TrimmingMode {
-    #[default]
-    Enabled,
-    Disabled,
-}
-
-impl FromStr for TrimmingMode {
-    type Err = ();
-
-    fn from_str(s: &str) -> Result<Self, Self::Err> {
-        let trimming_mode = match s {
-            "enabled" => TrimmingMode::Enabled,
-            "disabled" => TrimmingMode::Disabled,
-            _ => return Err(()),
-        };
-
-        Ok(trimming_mode)
-    }
-}
-
 #[derive(Default)]
 struct TypeAttrs {
     process_func: Option<String>,
     value_type: Vec<String>,
-    trim: TrimmingMode,
 }
 
 impl TypeAttrs {
@@ -315,19 +276,7 @@ fn parse_type_attributes(s: &synstructure::Structure<'_>) -> TypeAttrs {
                                         rv.value_type.push(litstr.value());
                                     }
                                     _ => {
-                                        panic!("Got non string literal for value_type");
-                                    }
-                                }
-                            } else if ident == "trim" {
-                                match name_value.lit {
-                                    Lit::Str(litstr) => {
-                                        rv.trim = litstr
-                                            .value()
-                                            .parse()
-                                            .expect("Got invalid value for trim")
-                                    }
-                                    _ => {
-                                        panic!("Got invalid value for trim");
+                                        panic!("Got non string literal for value type");
                                     }
                                 }
                             }
@@ -481,7 +430,7 @@ impl FieldAttrs {
                 max_bytes: #max_bytes,
                 pii: #pii,
                 retain: #retain,
-                trim: #trim
+                trim: #trim,
             }
         })
     }
@@ -656,6 +605,17 @@ fn parse_field_attributes(
                                         panic!("Got non string literal for retain");
                                     }
                                 }
+                            } else if ident == "trim" {
+                                match name_value.lit {
+                                    Lit::Str(litstr) => match litstr.value().as_str() {
+                                        "true" => rv.trim = None,
+                                        "false" => rv.trim = Some(false),
+                                        other => panic!("Unknown value {other}"),
+                                    },
+                                    _ => {
+                                        panic!("Got non string literal for trim");
+                                    }
+                                }
                             } else if ident == "legacy_alias" || ident == "skip_serialization" {
                                 // Skip
                             } else {
diff --git a/relay-event-normalization/src/trimming.rs b/relay-event-normalization/src/trimming.rs
index 6b823dd0b0f..56b44f98efe 100644
--- a/relay-event-normalization/src/trimming.rs
+++ b/relay-event-normalization/src/trimming.rs
@@ -54,10 +54,6 @@ impl TrimmingProcessor {
             .filter_map(|x| x.size_remaining)
             .min()
     }
-
-    fn should_trim(&self, state: &ProcessingState<'_>) -> bool {
-        state.attrs().trim
-    }
 }
 
 impl Processor for TrimmingProcessor {
@@ -78,7 +74,7 @@ impl Processor for TrimmingProcessor {
             });
         }
 
-        if self.should_trim(state) {
+        if state.attrs().trim {
             if self.remaining_size() == Some(0) {
                 // TODO: Create remarks (ensure they do not bloat event)
                 return Err(ProcessingAction::DeleteValueHard);
@@ -88,7 +84,6 @@ impl Processor for TrimmingProcessor {
                 return Err(ProcessingAction::DeleteValueHard);
             }
         }
-
         Ok(())
     }
 
@@ -137,11 +132,13 @@ impl Processor for TrimmingProcessor {
             trim_string(value, meta, max_chars, state.attrs().max_chars_allowance);
         }
 
-        if self.should_trim(state) {
-            if let Some(size_state) = self.size_state.last() {
-                if let Some(size_remaining) = size_state.size_remaining {
-                    trim_string(value, meta, size_remaining, 0);
-                }
+        if !state.attrs().trim {
+            return Ok(());
+        }
+
+        if let Some(size_state) = self.size_state.last() {
+            if let Some(size_remaining) = size_state.size_remaining {
+                trim_string(value, meta, size_remaining, 0);
             }
         }
 
@@ -157,6 +154,10 @@ impl Processor for TrimmingProcessor {
     where
         T: ProcessValue,
     {
+        if !state.attrs().trim {
+            return Ok(());
+        }
+
         // If we need to check the bag size, then we go down a different path
         if !self.size_state.is_empty() {
             let original_length = value.len();
@@ -176,14 +177,12 @@ impl Processor for TrimmingProcessor {
                 processor::process_value(item, self, &item_state)?;
             }
 
-            if self.should_trim(state) {
-                if let Some(split_index) = split_index {
-                    let _ = value.split_off(split_index);
-                }
+            if let Some(split_index) = split_index {
+                let _ = value.split_off(split_index);
+            }
 
-                if value.len() != original_length {
-                    meta.set_original_length(Some(original_length));
-                }
+            if value.len() != original_length {
+                meta.set_original_length(Some(original_length));
             }
         } else {
             value.process_child_values(self, state)?;
@@ -201,6 +200,10 @@ impl Processor for TrimmingProcessor {
     where
         T: ProcessValue,
     {
+        if !state.attrs().trim {
+            return Ok(());
+        }
+
         // If we need to check the bag size, then we go down a different path
         if !self.size_state.is_empty() {
             let original_length = value.len();
@@ -220,14 +223,12 @@ impl Processor for TrimmingProcessor {
                 processor::process_value(item, self, &item_state)?;
             }
 
-            if self.should_trim(state) {
-                if let Some(split_key) = split_key {
-                    let _ = value.split_off(&split_key);
-                }
+            if let Some(split_key) = split_key {
+                let _ = value.split_off(&split_key);
+            }
 
-                if value.len() != original_length {
-                    meta.set_original_length(Some(original_length));
-                }
+            if value.len() != original_length {
+                meta.set_original_length(Some(original_length));
             }
         } else {
             value.process_child_values(self, state)?;
@@ -242,6 +243,10 @@ impl Processor for TrimmingProcessor {
         _meta: &mut Meta,
         state: &ProcessingState<'_>,
     ) -> ProcessingResult {
+        if !state.attrs().trim {
+            return Ok(());
+        }
+
         match value {
             Value::Array(_) | Value::Object(_) => {
                 if self.remaining_depth(state) == Some(1) {
@@ -273,6 +278,10 @@ impl Processor for TrimmingProcessor {
         _meta: &mut Meta,
         state: &ProcessingState<'_>,
     ) -> ProcessingResult {
+        if !state.attrs().trim {
+            return Ok(());
+        }
+
         processor::apply(&mut stacktrace.frames, |frames, meta| {
             enforce_frame_hard_limit(frames, meta, 250);
             Ok(())
@@ -413,8 +422,10 @@ fn slim_frame_data(frames: &mut Array<Frame>, frame_allowance: usize) {
 mod tests {
     use std::iter::repeat;
 
+    use chrono::DateTime;
     use relay_event_schema::protocol::{
-        Breadcrumb, Context, Contexts, Event, Exception, ExtraValue, Span, TagEntry, Tags, Values,
+        Breadcrumb, Context, Contexts, Event, Exception, ExtraValue, Span, SpanId, TagEntry, Tags,
+        Timestamp, TraceId, Values,
     };
     use relay_protocol::{get_value, Map, Remark, SerializableAnnotated};
     use similar_asserts::assert_eq;
@@ -933,10 +944,8 @@ mod tests {
 
     #[test]
     fn test_too_many_spans_trimmed() {
-        let long_platform = "a".repeat(1024 * 100);
-
         let span = Span {
-            platform: Annotated::new(long_platform.clone()),
+            platform: Annotated::new("a".repeat(1024 * 100)),
             ..Default::default()
         };
         let spans = std::iter::repeat_with(|| Annotated::new(span.clone()))
@@ -951,24 +960,124 @@ mod tests {
         let mut processor = TrimmingProcessor::new();
         processor::process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
 
-        // We check that all the spans that were kept have no trimmed fields.
-        let trimmed_spans = get_value!(event.spans).unwrap();
-        assert_eq!(trimmed_spans.len(), 8);
-        for trimmed_span in trimmed_spans {
-            let platform = trimmed_span.value().unwrap().platform.value().unwrap();
-            assert_eq!(long_platform, *platform);
-        }
+        assert_eq!(event.0.unwrap().spans.0.unwrap().len(), 8);
+    }
+
+    #[test]
+    fn test_untrimmable_fields() {
+        let original_description = "a".repeat(819163);
+        let original_trace_id = TraceId("b".repeat(48));
+        let mut event = Annotated::new(Event {
+            spans: Annotated::new(vec![
+                Span {
+                    description: original_description.clone().into(),
+                    ..Default::default()
+                }
+                .into(),
+                Span {
+                    trace_id: original_trace_id.clone().into(),
+                    ..Default::default()
+                }
+                .into(),
+            ]),
+            ..Default::default()
+        });
+
+        let mut processor = TrimmingProcessor::new();
+        processor::process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
+
+        assert_eq!(
+            get_value!(event.spans[0].description!),
+            &original_description
+        );
+        // Trace ID would be trimmed without `trim = "false"`
+        assert_eq!(get_value!(event.spans[1].trace_id!), &original_trace_id);
+    }
+
+    #[test]
+    fn test_untrimmable_fields_drop() {
+        let original_description = "a".repeat(819164);
+        let original_span_id = SpanId("b".repeat(48));
+        let original_trace_id = TraceId("c".repeat(48));
+        let original_segment_id = SpanId("d".repeat(48));
+        let original_op = "e".repeat(129);
+
+        let mut event = Annotated::new(Event {
+            spans: Annotated::new(vec![
+                Span {
+                    description: original_description.clone().into(),
+                    ..Default::default()
+                }
+                .into(),
+                Span {
+                    span_id: original_span_id.clone().into(),
+                    trace_id: original_trace_id.clone().into(),
+                    segment_id: original_segment_id.clone().into(),
+                    is_segment: false.into(),
+                    op: original_op.clone().into(),
+                    start_timestamp: Timestamp(
+                        DateTime::parse_from_rfc3339("1996-12-19T16:39:57Z")
+                            .unwrap()
+                            .into(),
+                    )
+                    .into(),
+                    timestamp: Timestamp(
+                        DateTime::parse_from_rfc3339("1996-12-19T16:39:58Z")
+                            .unwrap()
+                            .into(),
+                    )
+                    .into(),
+                    ..Default::default()
+                }
+                .into(),
+            ]),
+            ..Default::default()
+        });
+
+        let mut processor = TrimmingProcessor::new();
+        processor::process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
 
-        // We check that the meta on spans is preserved signaling that we originally had 10
-        // elements.
         assert_eq!(
-            get_value!(event)
-                .unwrap()
-                .spans
-                .meta()
-                .original_length()
-                .unwrap(),
-            10
+            get_value!(event.spans[0].description!),
+            &original_description
         );
+        // These fields would be dropped without `trim = "false"`
+        assert_eq!(get_value!(event.spans[1].span_id!), &original_span_id);
+        assert_eq!(get_value!(event.spans[1].trace_id!), &original_trace_id);
+        assert_eq!(get_value!(event.spans[1].segment_id!), &original_segment_id);
+        assert_eq!(get_value!(event.spans[1].is_segment!), &false);
+        // span.op is trimmed to its max_chars, but not dropped:
+        assert_eq!(get_value!(event.spans[1].op!).len(), 128);
+        assert!(get_value!(event.spans[1].start_timestamp).is_some());
+        assert!(get_value!(event.spans[1].timestamp).is_some());
+    }
+
+    #[test]
+    fn test_trim_false_contributes_to_budget() {
+        for span_id in ["short", "looooooooooooooooooooooooooong"] {
+            let original_span_id = SpanId(span_id.to_owned());
+            let original_description = "a".repeat(900000);
+
+            let mut event = Annotated::new(Event {
+                spans: Annotated::new(vec![Span {
+                    span_id: original_span_id.clone().into(),
+                    description: original_description.clone().into(),
+                    ..Default::default()
+                }
+                .into()]),
+                ..Default::default()
+            });
+
+            let mut processor = TrimmingProcessor::new();
+            processor::process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();
+
+            assert_eq!(get_value!(event.spans[0].span_id!).as_ref(), span_id);
+
+            // The amount of trimming on the description depends on the length of the span id.
+            assert_eq!(
+                get_value!(event.spans[0].description!).len(),
+                1024 * 800 - 12 - span_id.len(),
+            );
+        }
     }
 }
diff --git a/relay-event-schema/src/processor/attrs.rs b/relay-event-schema/src/processor/attrs.rs
index bbd417a939f..c80a9fe7c63 100644
--- a/relay-event-schema/src/processor/attrs.rs
+++ b/relay-event-schema/src/processor/attrs.rs
@@ -128,7 +128,7 @@ pub struct FieldAttrs {
     pub pii: Pii,
     /// Whether additional properties should be retained during normalization.
     pub retain: bool,
-    /// `true` if the field is allowed to be trimmed.
+    /// Whether the trimming processor is allowed to shorten or drop this field.
     pub trim: bool,
 }
 
diff --git a/relay-event-schema/src/protocol/span.rs b/relay-event-schema/src/protocol/span.rs
index 032b9224294..33410bba2be 100644
--- a/relay-event-schema/src/protocol/span.rs
+++ b/relay-event-schema/src/protocol/span.rs
@@ -12,14 +12,14 @@ use crate::protocol::{
 
 #[derive(Clone, Debug, Default, PartialEq, Empty, FromValue, IntoValue, ProcessValue)]
 #[cfg_attr(feature = "jsonschema", derive(JsonSchema))]
-#[metastructure(process_func = "process_span", value_type = "Span", trim = "disabled")]
+#[metastructure(process_func = "process_span", value_type = "Span")]
 pub struct Span {
     /// Timestamp when the span was ended.
-    #[metastructure(required = "true")]
+    #[metastructure(required = "true", trim = "false")]
     pub timestamp: Annotated<Timestamp>,
 
     /// Timestamp when the span started.
-    #[metastructure(required = "true")]
+    #[metastructure(required = "true", trim = "false")]
     pub start_timestamp: Annotated<Timestamp>,
 
     /// The amount of time in milliseconds spent in this span,
@@ -27,27 +27,30 @@ pub struct Span {
     pub exclusive_time: Annotated<f64>,
 
     /// Span type (see `OperationType` docs).
-    #[metastructure(max_chars = 128)]
+    #[metastructure(max_chars = 128, trim = "false")]
     pub op: Annotated<OperationType>,
 
     /// The Span id.
-    #[metastructure(required = "true")]
+    #[metastructure(required = "true", trim = "false")]
     pub span_id: Annotated<SpanId>,
 
     /// The ID of the span enclosing this span.
+    #[metastructure(trim = "false")]
     pub parent_span_id: Annotated<SpanId>,
 
     /// The ID of the trace the span belongs to.
-    #[metastructure(required = "true")]
+    #[metastructure(required = "true", trim = "false")]
     pub trace_id: Annotated<TraceId>,
 
     /// A unique identifier for a segment within a trace (8 byte hexadecimal string).
     ///
     /// For spans embedded in transactions, the `segment_id` is the `span_id` of the containing
     /// transaction.
+    #[metastructure(trim = "false")]
     pub segment_id: Annotated<SpanId>,
 
     /// Whether or not the current span is the root of the segment.
+    #[metastructure(trim = "false")]
     pub is_segment: Annotated<bool>,
 
     /// The status of a span.