From 0ef6862274f789d7fbb66597e58bbaf58d9babdf Mon Sep 17 00:00:00 2001 From: Riccardo Busetti Date: Thu, 23 May 2024 13:59:05 +0200 Subject: [PATCH 01/28] feat(pii): Scrub key value pairs --- relay-pii/src/selector.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/relay-pii/src/selector.rs b/relay-pii/src/selector.rs index 481c2876d7..4732249d4a 100644 --- a/relay-pii/src/selector.rs +++ b/relay-pii/src/selector.rs @@ -154,7 +154,7 @@ impl SelectorPathItem { /// A selector that can match paths of processing states. /// -/// To use a a selector, you most likely want to check whether it matches the path of a +/// To use a selector, you most likely want to check whether it matches the path of a /// [`ProcessingState`]. For this you turn the state into a [`Path`] using /// [`ProcessingState::path`] and call [`SelectorSpec::matches_path`], which will iterate through /// the path items in the processing state and check whether the selector matches. From b5c2d3ea895afb153e2b7954dfa7489deed4babd Mon Sep 17 00:00:00 2001 From: Riccardo Busetti Date: Fri, 24 May 2024 09:09:14 +0200 Subject: [PATCH 02/28] feat(pii): Scrub key value pairs --- relay-pii/src/processor.rs | 83 +++++++++++++++++++++++++++++++++++++- 1 file changed, 82 insertions(+), 1 deletion(-) diff --git a/relay-pii/src/processor.rs b/relay-pii/src/processor.rs index 9e379db759..3a7bd01f6b 100644 --- a/relay-pii/src/processor.rs +++ b/relay-pii/src/processor.rs @@ -11,7 +11,7 @@ use relay_event_schema::processor::{ use relay_event_schema::protocol::{ AsPair, Event, IpAddr, NativeImagePath, PairList, Replay, ResponseContext, User, }; -use relay_protocol::{Annotated, Meta, Remark, RemarkType, Value}; +use relay_protocol::{Annotated, Array, Meta, Remark, RemarkType, Value}; use crate::compiledconfig::{CompiledPiiConfig, RuleRef}; use crate::config::RuleType; @@ -101,6 +101,23 @@ impl<'a> Processor for PiiProcessor<'a> { self.apply_all_rules(meta, state, None) } + fn process_array( + &mut self, + value: &mut Array, + meta: &mut Meta, + state: &ProcessingState<'_>, + ) -> ProcessingResult + where + T: ProcessValue, + { + // Recurse into each element of the array. + value.process_child_values(self, state)?; + + println!("PROCESSING ARRAY {:?}", value); + + Ok(()) + } + fn process_string( &mut self, value: &mut String, @@ -474,6 +491,7 @@ mod tests { }; use relay_protocol::{assert_annotated_snapshot, get_value, FromValue, Object}; use serde_json::json; + use std::collections::HashMap; use super::*; use crate::{DataScrubbingConfig, PiiConfig, ReplaceRedaction}; @@ -1598,4 +1616,67 @@ mod tests { ], )"#); } + + #[test] + fn test_tuple_array_scrubbed() { + let config = serde_json::from_str::( + r##" + { + "applications": { + "$string": ["@anything:remove"] + } + } + "##, + ) + .unwrap(); + + let mut event = Event::from_value( + serde_json::json!( + { + "message": "hi", + "exception": { + "values": [ + { + "type": "BrokenException", + "value": "Something failed", + "stacktrace": { + "frames": [ + { + "vars": { + "headers": [ + ["authorization", "Bearer abc123"] + ] + } + } + ] + } + } + ] + } + }) + .into(), + ); + + let mut processor = PiiProcessor::new(config.compiled()); + process_value(&mut event, &mut processor, ProcessingState::root()).unwrap(); + + // let params = get_value!(event.logentry.params!); + // assert_debug_snapshot!(params, @r#"Array( + // [ + // Meta { + // remarks: [ + // Remark { + // ty: Removed, + // rule_id: "@anything:remove", + // range: None, + // }, + // ], + // errors: [], + // original_length: None, + // original_value: None, + // }, + // ], + // )"#); + // } + } } From 7aedc7ba1293202b8303935f9e077f3d26e315e8 Mon Sep 17 00:00:00 2001 From: Riccardo Busetti Date: Fri, 24 May 2024 13:19:04 +0200 Subject: [PATCH 03/28] Add code --- Cargo.toml | 2 +- relay-event-schema/src/processor/impls.rs | 5 +++++ relay-event-schema/src/processor/traits.rs | 6 ++++++ relay-pii/src/processor.rs | 21 +++++++++++++++++---- 4 files changed, 29 insertions(+), 5 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 052e2bacca..bd5b117022 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,7 +6,7 @@ resolver = "2" [profile.dev] # Debug information slows down the build and increases caches in the # target folder, but we don't require stack traces in most cases. -debug = false +#debug = false [profile.release] # In release, however, we do want full debug information to report diff --git a/relay-event-schema/src/processor/impls.rs b/relay-event-schema/src/processor/impls.rs index c36c34ddbb..3e61a6e907 100644 --- a/relay-event-schema/src/processor/impls.rs +++ b/relay-event-schema/src/processor/impls.rs @@ -9,6 +9,11 @@ use crate::processor::{ }; impl ProcessValue for String { + #[inline] + fn as_key(&self) -> Option<&str> { + Some(self.as_str()) + } + #[inline] fn value_type(&self) -> EnumSet { EnumSet::only(ValueType::String) diff --git a/relay-event-schema/src/processor/traits.rs b/relay-event-schema/src/processor/traits.rs index ac1a78728d..578a972297 100644 --- a/relay-event-schema/src/processor/traits.rs +++ b/relay-event-schema/src/processor/traits.rs @@ -139,6 +139,12 @@ pub use enumset::{enum_set, EnumSet}; /// A recursively processable value. pub trait ProcessValue: FromValue + IntoValue + Debug + Clone { + /// Returns an equivalent key representation of the value. + #[inline] + fn as_key(&self) -> Option<&str> { + None + } + /// Returns the type of the value. #[inline] fn value_type(&self) -> EnumSet { diff --git a/relay-pii/src/processor.rs b/relay-pii/src/processor.rs index 3a7bd01f6b..e7717082b5 100644 --- a/relay-pii/src/processor.rs +++ b/relay-pii/src/processor.rs @@ -110,10 +110,21 @@ impl<'a> Processor for PiiProcessor<'a> { where T: ProcessValue, { - // Recurse into each element of the array. - value.process_child_values(self, state)?; + // If the array has length 2, we treat it as key-value pair and try to scrub it. If the + // scrubbing doesn't do anything, we try to scrub values individually. + if value.len() == 2 { + let key = value[0].clone(); + if let Some(key) = key.value() { + if let Some(key_name) = key.as_key() { + let entered = + state.enter_borrowed(key_name, state.inner_attrs(), key.value_type()); + value.process_child_values(self, &entered)?; + } + } + } - println!("PROCESSING ARRAY {:?}", value); + // Recurse into each element of the array in case the key-value scrubbing didn't scrub data. + value.process_child_values(self, state)?; Ok(()) } @@ -1623,7 +1634,7 @@ mod tests { r##" { "applications": { - "$string": ["@anything:remove"] + "$number": ["@anything:remove"] } } "##, @@ -1657,6 +1668,8 @@ mod tests { .into(), ); + println!("{:?}", event); + let mut processor = PiiProcessor::new(config.compiled()); process_value(&mut event, &mut processor, ProcessingState::root()).unwrap(); From fd9a5b2f8304c055769908f66aec7cf5572b47b4 Mon Sep 17 00:00:00 2001 From: Riccardo Busetti Date: Mon, 27 May 2024 08:40:59 +0200 Subject: [PATCH 04/28] Fix --- relay-event-schema/src/processor/impls.rs | 5 - relay-event-schema/src/processor/traits.rs | 6 -- relay-pii/src/processor.rs | 107 +++++++++++++++------ 3 files changed, 77 insertions(+), 41 deletions(-) diff --git a/relay-event-schema/src/processor/impls.rs b/relay-event-schema/src/processor/impls.rs index 3e61a6e907..c36c34ddbb 100644 --- a/relay-event-schema/src/processor/impls.rs +++ b/relay-event-schema/src/processor/impls.rs @@ -9,11 +9,6 @@ use crate::processor::{ }; impl ProcessValue for String { - #[inline] - fn as_key(&self) -> Option<&str> { - Some(self.as_str()) - } - #[inline] fn value_type(&self) -> EnumSet { EnumSet::only(ValueType::String) diff --git a/relay-event-schema/src/processor/traits.rs b/relay-event-schema/src/processor/traits.rs index 578a972297..ac1a78728d 100644 --- a/relay-event-schema/src/processor/traits.rs +++ b/relay-event-schema/src/processor/traits.rs @@ -139,12 +139,6 @@ pub use enumset::{enum_set, EnumSet}; /// A recursively processable value. pub trait ProcessValue: FromValue + IntoValue + Debug + Clone { - /// Returns an equivalent key representation of the value. - #[inline] - fn as_key(&self) -> Option<&str> { - None - } - /// Returns the type of the value. #[inline] fn value_type(&self) -> EnumSet { diff --git a/relay-pii/src/processor.rs b/relay-pii/src/processor.rs index e7717082b5..c58533b6d7 100644 --- a/relay-pii/src/processor.rs +++ b/relay-pii/src/processor.rs @@ -5,8 +5,8 @@ use std::sync::OnceLock; use regex::Regex; use relay_event_schema::processor::{ - self, enum_set, Chunk, Pii, ProcessValue, ProcessingAction, ProcessingResult, ProcessingState, - Processor, ValueType, + self, enum_set, process_value, Chunk, Pii, ProcessValue, ProcessingAction, ProcessingResult, + ProcessingState, Processor, ValueType, }; use relay_event_schema::protocol::{ AsPair, Event, IpAddr, NativeImagePath, PairList, Replay, ResponseContext, User, @@ -104,7 +104,7 @@ impl<'a> Processor for PiiProcessor<'a> { fn process_array( &mut self, value: &mut Array, - meta: &mut Meta, + _meta: &mut Meta, state: &ProcessingState<'_>, ) -> ProcessingResult where @@ -114,11 +114,15 @@ impl<'a> Processor for PiiProcessor<'a> { // scrubbing doesn't do anything, we try to scrub values individually. if value.len() == 2 { let key = value[0].clone(); - if let Some(key) = key.value() { - if let Some(key_name) = key.as_key() { - let entered = - state.enter_borrowed(key_name, state.inner_attrs(), key.value_type()); - value.process_child_values(self, &entered)?; + if let Some(key) = key.into_value() { + let key_value_type = key.value_type(); + if let Value::String(key_name) = key.into_value() { + let entered = state.enter_borrowed( + key_name.as_str(), + state.inner_attrs(), + key_value_type, + ); + process_value(&mut value[1], self, &entered)?; } } } @@ -502,7 +506,6 @@ mod tests { }; use relay_protocol::{assert_annotated_snapshot, get_value, FromValue, Object}; use serde_json::json; - use std::collections::HashMap; use super::*; use crate::{DataScrubbingConfig, PiiConfig, ReplaceRedaction}; @@ -1634,7 +1637,7 @@ mod tests { r##" { "applications": { - "$number": ["@anything:remove"] + "exception.values.0.stacktrace.frames.0.vars.headers.0.authorization": ["@anything:replace"] } } "##, @@ -1668,28 +1671,72 @@ mod tests { .into(), ); - println!("{:?}", event); - let mut processor = PiiProcessor::new(config.compiled()); process_value(&mut event, &mut processor, ProcessingState::root()).unwrap(); - // let params = get_value!(event.logentry.params!); - // assert_debug_snapshot!(params, @r#"Array( - // [ - // Meta { - // remarks: [ - // Remark { - // ty: Removed, - // rule_id: "@anything:remove", - // range: None, - // }, - // ], - // errors: [], - // original_length: None, - // original_value: None, - // }, - // ], - // )"#); - // } + let vars = event + .value() + .unwrap() + .exceptions + .value() + .unwrap() + .values + .value() + .unwrap()[0] + .value() + .unwrap() + .stacktrace + .value() + .unwrap() + .frames + .value() + .unwrap()[0] + .value() + .unwrap() + .vars + .value() + .unwrap(); + + assert_debug_snapshot!(vars, @r#" + FrameVars( + { + "headers": Array( + [ + Array( + [ + String( + "authorization", + ), + Annotated( + String( + "[Filtered]", + ), + Meta { + remarks: [ + Remark { + ty: Substituted, + rule_id: "@anything:replace", + range: Some( + ( + 0, + 10, + ), + ), + }, + ], + errors: [], + original_length: Some( + 13, + ), + original_value: None, + }, + ), + ], + ), + ], + ), + }, + ) + "#); } } From d1acc1ccec45ac45809e9188880ef6fd2961d561 Mon Sep 17 00:00:00 2001 From: Riccardo Busetti Date: Mon, 27 May 2024 09:17:27 +0200 Subject: [PATCH 05/28] Fix --- CHANGELOG.md | 1 + Cargo.toml | 2 +- relay-pii/src/processor.rs | 126 ++++++++++++++++++++++++++++++++++--- 3 files changed, 118 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9428a2b43d..6418f523bf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ **Features**: - Apply legacy inbound filters to standalone spans. ([#3552](https://github.com/getsentry/relay/pull/3552)) +- Allow treating arrays of two elements as key-value pairs during PII scrubbing. ([#3639](https://github.com/getsentry/relay/pull/3639)) **Internal**: diff --git a/Cargo.toml b/Cargo.toml index bd5b117022..052e2bacca 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,7 +6,7 @@ resolver = "2" [profile.dev] # Debug information slows down the build and increases caches in the # target folder, but we don't require stack traces in most cases. -#debug = false +debug = false [profile.release] # In release, however, we do want full debug information to report diff --git a/relay-pii/src/processor.rs b/relay-pii/src/processor.rs index c58533b6d7..6ab3f31fd3 100644 --- a/relay-pii/src/processor.rs +++ b/relay-pii/src/processor.rs @@ -113,16 +113,25 @@ impl<'a> Processor for PiiProcessor<'a> { // If the array has length 2, we treat it as key-value pair and try to scrub it. If the // scrubbing doesn't do anything, we try to scrub values individually. if value.len() == 2 { - let key = value[0].clone(); - if let Some(key) = key.into_value() { - let key_value_type = key.value_type(); + if let Some(key) = value[0].clone().into_value() { if let Value::String(key_name) = key.into_value() { - let entered = state.enter_borrowed( - key_name.as_str(), - state.inner_attrs(), - key_value_type, - ); - process_value(&mut value[1], self, &entered)?; + if let Some(inner_value) = value[1].value() { + // We compute a new state which has the first element of the array as the + // key. + let entered = state.enter_borrowed( + key_name.as_str(), + state.inner_attrs(), + inner_value.value_type(), + ); + + let previous_meta = value[1].meta().clone(); + process_value(&mut value[1], self, &entered)?; + // We check whether meta has changed from empty to non-empty in order to + // understand if some rules matched downstream. + if previous_meta.is_empty() && !value[1].meta().is_empty() { + return Ok(()); + } + } } } } @@ -1632,7 +1641,7 @@ mod tests { } #[test] - fn test_tuple_array_scrubbed() { + fn test_tuple_array_scrubbed_with_path_selector() { let config = serde_json::from_str::( r##" { @@ -1739,4 +1748,101 @@ mod tests { ) "#); } + + #[test] + fn test_tuple_array_scrubbed_with_string_selector_and_password_matcher() { + let config = serde_json::from_str::( + r##" + { + "applications": { + "$string": ["@password:remove"] + } + } + "##, + ) + .unwrap(); + + let mut event = Event::from_value( + serde_json::json!( + { + "message": "hi", + "exception": { + "values": [ + { + "type": "BrokenException", + "value": "Something failed", + "stacktrace": { + "frames": [ + { + "vars": { + "headers": [ + ["authorization", "abc123"] + ] + } + } + ] + } + } + ] + } + }) + .into(), + ); + + let mut processor = PiiProcessor::new(config.compiled()); + process_value(&mut event, &mut processor, ProcessingState::root()).unwrap(); + + let vars = event + .value() + .unwrap() + .exceptions + .value() + .unwrap() + .values + .value() + .unwrap()[0] + .value() + .unwrap() + .stacktrace + .value() + .unwrap() + .frames + .value() + .unwrap()[0] + .value() + .unwrap() + .vars + .value() + .unwrap(); + + assert_debug_snapshot!(vars, @r###" + FrameVars( + { + "headers": Array( + [ + Array( + [ + String( + "authorization", + ), + Meta { + remarks: [ + Remark { + ty: Removed, + rule_id: "@password:remove", + range: None, + }, + ], + errors: [], + original_length: None, + original_value: None, + }, + ], + ), + ], + ), + }, + ) + "###); + } } From 0967300eaa5b5d1f2fca6b407b2bbeac6d74d116 Mon Sep 17 00:00:00 2001 From: Riccardo Busetti Date: Mon, 27 May 2024 09:22:11 +0200 Subject: [PATCH 06/28] Fix --- relay-pii/src/processor.rs | 69 +++++++++++++++++++++----------------- 1 file changed, 39 insertions(+), 30 deletions(-) diff --git a/relay-pii/src/processor.rs b/relay-pii/src/processor.rs index 6ab3f31fd3..399fd4a2ff 100644 --- a/relay-pii/src/processor.rs +++ b/relay-pii/src/processor.rs @@ -507,7 +507,7 @@ fn insert_replacement_chunks(rule: &RuleRef, text: &str, output: &mut Vec( + let configs = vec![ r##" { "applications": { @@ -1650,8 +1650,14 @@ mod tests { } } "##, - ) - .unwrap(); + r##" + { + "applications": { + "exception.values.0.stacktrace.frames.0.vars.headers.0.1": ["@anything:replace"] + } + } + "##, + ]; let mut event = Event::from_value( serde_json::json!( @@ -1680,33 +1686,35 @@ mod tests { .into(), ); - let mut processor = PiiProcessor::new(config.compiled()); - process_value(&mut event, &mut processor, ProcessingState::root()).unwrap(); + for config in configs { + let config = serde_json::from_str::(config).unwrap(); + let mut processor = PiiProcessor::new(config.compiled()); + process_value(&mut event, &mut processor, ProcessingState::root()).unwrap(); - let vars = event - .value() - .unwrap() - .exceptions - .value() - .unwrap() - .values - .value() - .unwrap()[0] - .value() - .unwrap() - .stacktrace - .value() - .unwrap() - .frames - .value() - .unwrap()[0] - .value() - .unwrap() - .vars - .value() - .unwrap(); + let vars = event + .value() + .unwrap() + .exceptions + .value() + .unwrap() + .values + .value() + .unwrap()[0] + .value() + .unwrap() + .stacktrace + .value() + .unwrap() + .frames + .value() + .unwrap()[0] + .value() + .unwrap() + .vars + .value() + .unwrap(); - assert_debug_snapshot!(vars, @r#" + allow_duplicates!(assert_debug_snapshot!(vars, @r#" FrameVars( { "headers": Array( @@ -1746,7 +1754,8 @@ mod tests { ), }, ) - "#); + "#)); + } } #[test] From 0672c9c403026805ae658adc6bf8be3f74578066 Mon Sep 17 00:00:00 2001 From: Riccardo Busetti Date: Mon, 27 May 2024 09:23:21 +0200 Subject: [PATCH 07/28] Fix --- relay-pii/src/processor.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/relay-pii/src/processor.rs b/relay-pii/src/processor.rs index 399fd4a2ff..45a3c5b912 100644 --- a/relay-pii/src/processor.rs +++ b/relay-pii/src/processor.rs @@ -1642,7 +1642,10 @@ mod tests { #[test] fn test_tuple_array_scrubbed_with_path_selector() { + // We expect that both of these configs express the same semantics. let configs = vec![ + // This configuration matches on the authorization element (the 1st element of the array + // represents the key). r##" { "applications": { @@ -1650,6 +1653,7 @@ mod tests { } } "##, + // This configuration matches on the 2nd element of the array. r##" { "applications": { From 49028593f0cc1f78eeb022b234b52666db9dd26c Mon Sep 17 00:00:00 2001 From: Riccardo Busetti Date: Mon, 27 May 2024 09:31:18 +0200 Subject: [PATCH 08/28] Update snapshot --- ...i__convert__tests__bearer_tokens_scrubbed.snap | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/relay-pii/src/snapshots/relay_pii__convert__tests__bearer_tokens_scrubbed.snap b/relay-pii/src/snapshots/relay_pii__convert__tests__bearer_tokens_scrubbed.snap index 162f954c18..12634a1f33 100644 --- a/relay-pii/src/snapshots/relay_pii__convert__tests__bearer_tokens_scrubbed.snap +++ b/relay-pii/src/snapshots/relay_pii__convert__tests__bearer_tokens_scrubbed.snap @@ -13,7 +13,7 @@ expression: data "request": { "headers": [ [ - "[Filtered]", + "AuthToken", "[Filtered]" ] ] @@ -36,19 +36,6 @@ expression: data "request": { "headers": { "0": { - "0": { - "": { - "rem": [ - [ - "@password:filter", - "s", - 0, - 10 - ] - ], - "len": 9 - } - }, "1": { "": { "rem": [ From 901545fffabb83a7d9d19158d2476316e62a557a Mon Sep 17 00:00:00 2001 From: Riccardo Busetti Date: Mon, 27 May 2024 13:32:59 +0200 Subject: [PATCH 09/28] Change --- relay-pii/src/processor.rs | 35 +++++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/relay-pii/src/processor.rs b/relay-pii/src/processor.rs index 45a3c5b912..5acc623420 100644 --- a/relay-pii/src/processor.rs +++ b/relay-pii/src/processor.rs @@ -123,20 +123,14 @@ impl<'a> Processor for PiiProcessor<'a> { state.inner_attrs(), inner_value.value_type(), ); - - let previous_meta = value[1].meta().clone(); process_value(&mut value[1], self, &entered)?; - // We check whether meta has changed from empty to non-empty in order to - // understand if some rules matched downstream. - if previous_meta.is_empty() && !value[1].meta().is_empty() { - return Ok(()); - } } } } } - // Recurse into each element of the array in case the key-value scrubbing didn't scrub data. + // Recurse into each element of the array since we also want to individually scrub each + // value in the array. value.process_child_values(self, state)?; Ok(()) @@ -1835,8 +1829,29 @@ mod tests { [ Array( [ - String( - "authorization", + Annotated( + String( + "", + ), + Meta { + remarks: [ + Remark { + ty: Removed, + rule_id: "@password:remove", + range: Some( + ( + 0, + 0, + ), + ), + }, + ], + errors: [], + original_length: Some( + 13, + ), + original_value: None, + }, ), Meta { remarks: [ From e13348dc8a02aa1360e9d5282f0bacc4949480f8 Mon Sep 17 00:00:00 2001 From: Riccardo Busetti Date: Mon, 27 May 2024 13:50:32 +0200 Subject: [PATCH 10/28] Change --- relay-pii/src/processor.rs | 72 ++++++++++++++------------------------ 1 file changed, 27 insertions(+), 45 deletions(-) diff --git a/relay-pii/src/processor.rs b/relay-pii/src/processor.rs index 5acc623420..1214219c4a 100644 --- a/relay-pii/src/processor.rs +++ b/relay-pii/src/processor.rs @@ -504,7 +504,7 @@ mod tests { use insta::{allow_duplicates, assert_debug_snapshot}; use relay_event_schema::processor::process_value; use relay_event_schema::protocol::{ - Addr, Breadcrumb, DebugImage, DebugMeta, ExtraValue, Headers, LogEntry, Message, + Addr, Breadcrumb, DebugImage, DebugMeta, ExtraValue, FrameVars, Headers, LogEntry, Message, NativeDebugImage, Request, Span, TagEntry, Tags, TraceContext, }; use relay_protocol::{assert_annotated_snapshot, get_value, FromValue, Object}; @@ -524,6 +524,30 @@ mod tests { rv } + fn extract_vars(event: Option<&Event>) -> &FrameVars { + event + .unwrap() + .exceptions + .value() + .unwrap() + .values + .value() + .unwrap()[0] + .value() + .unwrap() + .stacktrace + .value() + .unwrap() + .frames + .value() + .unwrap()[0] + .value() + .unwrap() + .vars + .value() + .unwrap() + } + #[test] fn test_scrub_original_value() { let mut data = Event::from_value( @@ -1689,28 +1713,7 @@ mod tests { let mut processor = PiiProcessor::new(config.compiled()); process_value(&mut event, &mut processor, ProcessingState::root()).unwrap(); - let vars = event - .value() - .unwrap() - .exceptions - .value() - .unwrap() - .values - .value() - .unwrap()[0] - .value() - .unwrap() - .stacktrace - .value() - .unwrap() - .frames - .value() - .unwrap()[0] - .value() - .unwrap() - .vars - .value() - .unwrap(); + let vars = extract_vars(event.value()); allow_duplicates!(assert_debug_snapshot!(vars, @r#" FrameVars( @@ -1799,28 +1802,7 @@ mod tests { let mut processor = PiiProcessor::new(config.compiled()); process_value(&mut event, &mut processor, ProcessingState::root()).unwrap(); - let vars = event - .value() - .unwrap() - .exceptions - .value() - .unwrap() - .values - .value() - .unwrap()[0] - .value() - .unwrap() - .stacktrace - .value() - .unwrap() - .frames - .value() - .unwrap()[0] - .value() - .unwrap() - .vars - .value() - .unwrap(); + let vars = extract_vars(event.value()); assert_debug_snapshot!(vars, @r###" FrameVars( From 8b446036ce29a0270e8af02e1e3d3a160201b1b1 Mon Sep 17 00:00:00 2001 From: Riccardo Busetti Date: Mon, 27 May 2024 14:03:49 +0200 Subject: [PATCH 11/28] Change --- ...i__convert__tests__bearer_tokens_scrubbed.snap | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/relay-pii/src/snapshots/relay_pii__convert__tests__bearer_tokens_scrubbed.snap b/relay-pii/src/snapshots/relay_pii__convert__tests__bearer_tokens_scrubbed.snap index 12634a1f33..162f954c18 100644 --- a/relay-pii/src/snapshots/relay_pii__convert__tests__bearer_tokens_scrubbed.snap +++ b/relay-pii/src/snapshots/relay_pii__convert__tests__bearer_tokens_scrubbed.snap @@ -13,7 +13,7 @@ expression: data "request": { "headers": [ [ - "AuthToken", + "[Filtered]", "[Filtered]" ] ] @@ -36,6 +36,19 @@ expression: data "request": { "headers": { "0": { + "0": { + "": { + "rem": [ + [ + "@password:filter", + "s", + 0, + 10 + ] + ], + "len": 9 + } + }, "1": { "": { "rem": [ From a99c1ae7fdd18eb5cd9533de19c31651308cafd8 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Tue, 28 May 2024 11:27:56 +0200 Subject: [PATCH 12/28] wip --- relay-pii/src/processor.rs | 437 +++++++++++++++++++++++++++++++++++-- 1 file changed, 414 insertions(+), 23 deletions(-) diff --git a/relay-pii/src/processor.rs b/relay-pii/src/processor.rs index 1214219c4a..1fe460bd20 100644 --- a/relay-pii/src/processor.rs +++ b/relay-pii/src/processor.rs @@ -11,7 +11,7 @@ use relay_event_schema::processor::{ use relay_event_schema::protocol::{ AsPair, Event, IpAddr, NativeImagePath, PairList, Replay, ResponseContext, User, }; -use relay_protocol::{Annotated, Array, Meta, Remark, RemarkType, Value}; +use relay_protocol::{Annotated, Array, Meta, Object, Remark, RemarkType, Value}; use crate::compiledconfig::{CompiledPiiConfig, RuleRef}; use crate::config::RuleType; @@ -57,6 +57,391 @@ impl<'a> PiiProcessor<'a> { } } +struct TryPairlistVisitor{ + latest_key: Option, + result: Result)>, ()> +} // TODO: store references + +impl Processor for TryPairlistVisitor { + #[inline] + fn process_string( + &mut self, + s: &mut String, + meta: &mut Meta, + state: &ProcessingState<'_>, + ) -> ProcessingResult where { + dbg!("visiting a string: ", &s); + + // let Ok(map) = &mut self.result else { return Ok(()) }; + + if dbg!(state.depth()) == 1 && dbg!(state.path().index()) == Some(0) { + dbg!("pushing a string: ", &s); + self.latest_key = Some(s.clone()); + } + Ok(()) + } + + + + #[inline] + fn process_array( + &mut self, + value: &mut relay_protocol::Array, + meta: &mut Meta, + state: &ProcessingState<'_>, + ) -> ProcessingResult + where + T: ProcessValue, + { + if value.len() != 2 { + self.result = Err(()); + return Ok(()); + } + + let key = value[0]; + // visit key: + process_value(&mut value[0], self, state); + match (self.latest_key, self.result) { + (Some(key), Ok(map)) => map.push((key, T::into_value(self))) + _ => { + self.result = Err(()); return Ok(()) + } + } + + Ok(()) + } + + // TODO: before_process early return + + #[inline] + fn process_object( + &mut self, + value: &mut relay_protocol::Object, + meta: &mut Meta, + state: &ProcessingState<'_>, + ) -> ProcessingResult + where + T: ProcessValue, + { + value.process_child_values(self, state)?; + Ok(()) + } + + #[inline] + fn process_pairlist( + &mut self, + value: &mut PairList, + meta: &mut Meta, + state: &ProcessingState<'_>, + ) -> ProcessingResult + where + T: ProcessValue, + T: AsPair, + { + value.process_child_values(self, state)?; + Ok(()) + } + + #[inline] + fn process_values( + &mut self, + value: &mut Values, + meta: &mut Meta, + state: &ProcessingState<'_>, + ) -> ProcessingResult + where + T: ProcessValue, + { + value.process_child_values(self, state)?; + Ok(()) + } + + #[inline] + fn process_timestamp( + &mut self, + value: &mut Timestamp, + meta: &mut Meta, + state: &ProcessingState<'_>, + ) -> ProcessingResult where { + value.process_child_values(self, state)?; + Ok(()) + } + + #[inline] + fn process_event( + &mut self, + value: &mut Event, + meta: &mut Meta, + state: &ProcessingState<'_>, + ) -> ProcessingResult where { + value.process_child_values(self, state)?; + Ok(()) + } + + #[inline] + fn process_replay( + &mut self, + value: &mut Replay, + meta: &mut Meta, + state: &ProcessingState<'_>, + ) -> ProcessingResult where { + value.process_child_values(self, state)?; + Ok(()) + } + + #[inline] + fn process_exception( + &mut self, + value: &mut Exception, + meta: &mut Meta, + state: &ProcessingState<'_>, + ) -> ProcessingResult where { + value.process_child_values(self, state)?; + Ok(()) + } + + #[inline] + fn process_raw_stacktrace( + &mut self, + value: &mut RawStacktrace, + meta: &mut Meta, + state: &ProcessingState<'_>, + ) -> ProcessingResult where { + value.process_child_values(self, state)?; + Ok(()) + } + + #[inline] + fn process_stacktrace( + &mut self, + value: &mut Stacktrace, + meta: &mut Meta, + state: &ProcessingState<'_>, + ) -> ProcessingResult where { + value.process_child_values(self, state)?; + Ok(()) + } + + #[inline] + fn process_frame( + &mut self, + value: &mut Frame, + meta: &mut Meta, + state: &ProcessingState<'_>, + ) -> ProcessingResult where { + value.process_child_values(self, state)?; + Ok(()) + } + + #[inline] + fn process_request( + &mut self, + value: &mut Request, + meta: &mut Meta, + state: &ProcessingState<'_>, + ) -> ProcessingResult where { + value.process_child_values(self, state)?; + Ok(()) + } + + #[inline] + fn process_user( + &mut self, + value: &mut User, + meta: &mut Meta, + state: &ProcessingState<'_>, + ) -> ProcessingResult where { + value.process_child_values(self, state)?; + Ok(()) + } + + #[inline] + fn process_client_sdk_info( + &mut self, + value: &mut ClientSdkInfo, + meta: &mut Meta, + state: &ProcessingState<'_>, + ) -> ProcessingResult where { + value.process_child_values(self, state)?; + Ok(()) + } + + #[inline] + fn process_debug_meta( + &mut self, + value: &mut DebugMeta, + meta: &mut Meta, + state: &ProcessingState<'_>, + ) -> ProcessingResult where { + value.process_child_values(self, state)?; + Ok(()) + } + + #[inline] + fn process_debug_image( + &mut self, + value: &mut DebugImage, + meta: &mut Meta, + state: &ProcessingState<'_>, + ) -> ProcessingResult where { + value.process_child_values(self, state)?; + Ok(()) + } + + #[inline] + fn process_geo( + &mut self, + value: &mut Geo, + meta: &mut Meta, + state: &ProcessingState<'_>, + ) -> ProcessingResult where { + value.process_child_values(self, state)?; + Ok(()) + } + + #[inline] + fn process_logentry( + &mut self, + value: &mut LogEntry, + meta: &mut Meta, + state: &ProcessingState<'_>, + ) -> ProcessingResult where { + value.process_child_values(self, state)?; + Ok(()) + } + + #[inline] + fn process_thread( + &mut self, + value: &mut Thread, + meta: &mut Meta, + state: &ProcessingState<'_>, + ) -> ProcessingResult where { + value.process_child_values(self, state)?; + Ok(()) + } + + #[inline] + fn process_context( + &mut self, + value: &mut Context, + meta: &mut Meta, + state: &ProcessingState<'_>, + ) -> ProcessingResult where { + value.process_child_values(self, state)?; + Ok(()) + } + + #[inline] + fn process_breadcrumb( + &mut self, + value: &mut Breadcrumb, + meta: &mut Meta, + state: &ProcessingState<'_>, + ) -> ProcessingResult where { + value.process_child_values(self, state)?; + Ok(()) + } + + #[inline] + fn process_template_info( + &mut self, + value: &mut TemplateInfo, + meta: &mut Meta, + state: &ProcessingState<'_>, + ) -> ProcessingResult where { + value.process_child_values(self, state)?; + Ok(()) + } + + #[inline] + fn process_header_name( + &mut self, + value: &mut HeaderName, + meta: &mut Meta, + state: &ProcessingState<'_>, + ) -> ProcessingResult where { + value.process_child_values(self, state)?; + Ok(()) + } + + #[inline] + fn process_span( + &mut self, + value: &mut Span, + meta: &mut Meta, + state: &ProcessingState<'_>, + ) -> ProcessingResult where { + value.process_child_values(self, state)?; + Ok(()) + } + + #[inline] + fn process_trace_context( + &mut self, + value: &mut TraceContext, + meta: &mut Meta, + state: &ProcessingState<'_>, + ) -> ProcessingResult where { + value.process_child_values(self, state)?; + Ok(()) + } + + #[inline] + fn process_native_image_path( + &mut self, + value: &mut NativeImagePath, + meta: &mut Meta, + state: &ProcessingState<'_>, + ) -> ProcessingResult where { + value.process_child_values(self, state)?; + Ok(()) + } + + #[inline] + fn process_contexts( + &mut self, + value: &mut Contexts, + meta: &mut Meta, + state: &ProcessingState<'_>, + ) -> ProcessingResult where { + value.process_child_values(self, state)?; + Ok(()) + } + + fn process_other( + &mut self, + other: &mut relay_protocol::Object, + state: &ProcessingState<'_>, + ) -> ProcessingResult { + for (key, value) in other { + process_value( + value, + self, + &state.enter_borrowed( + key.as_str(), + state.inner_attrs(), + ValueType::for_field(value), + ), + )?; + } + + Ok(()) + } +} + +fn try_as_pairlist(array: &mut Array) -> Result, ()> { + let mut visitor = TryPairlistVisitor(Ok(BTreeMap::new())); + + array.process_child_values(&mut visitor, ProcessingState::root()); + match visitor.0 { + Ok(map) if !map.is_empty() => Ok(Object::from_iter( + map.into_iter().map(|(k, v)| (k.to_owned(), v.clone())), + )), + _ => Err(()), + } +} + impl<'a> Processor for PiiProcessor<'a> { fn before_process( &mut self, @@ -104,36 +489,42 @@ impl<'a> Processor for PiiProcessor<'a> { fn process_array( &mut self, value: &mut Array, - _meta: &mut Meta, + meta: &mut Meta, state: &ProcessingState<'_>, ) -> ProcessingResult where T: ProcessValue, { - // If the array has length 2, we treat it as key-value pair and try to scrub it. If the - // scrubbing doesn't do anything, we try to scrub values individually. - if value.len() == 2 { - if let Some(key) = value[0].clone().into_value() { - if let Value::String(key_name) = key.into_value() { - if let Some(inner_value) = value[1].value() { - // We compute a new state which has the first element of the array as the - // key. - let entered = state.enter_borrowed( - key_name.as_str(), - state.inner_attrs(), - inner_value.value_type(), - ); - process_value(&mut value[1], self, &entered)?; - } - } + match try_as_pairlist(value) { + Ok(mut obj) => { + self.process_pairlist(obj, meta, state) + // TODO: convert object back to array + } // TODO: enter_nothing? + Err(_) => { + // Recurse into each element of the array since we also want to individually scrub each + // value in the array. + value.process_child_values(self, state) } } - // Recurse into each element of the array since we also want to individually scrub each - // value in the array. - value.process_child_values(self, state)?; - - Ok(()) + // If the array has length 2, we treat it as key-value pair and try to scrub it. If the + // scrubbing doesn't do anything, we try to scrub values individually. + // if value.len() == 2 { + // if let Some(key) = value[0].clone().into_value() { + // if let Value::String(key_name) = key.into_value() { + // if let Some(inner_value) = value[1].value() { + // // We compute a new state which has the first element of the array as the + // // key. + // let entered = state.enter_borrowed( + // key_name.as_str(), + // state.inner_attrs(), + // inner_value.value_type(), + // ); + // process_value(&mut value[1], self, &entered)?; + // } + // } + // } + // } } fn process_string( From 399814f52d097cd67c1a389037825892d03b3da7 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Tue, 28 May 2024 12:30:10 +0200 Subject: [PATCH 13/28] wip --- relay-pii/src/processor.rs | 415 ++++--------------------------------- 1 file changed, 44 insertions(+), 371 deletions(-) diff --git a/relay-pii/src/processor.rs b/relay-pii/src/processor.rs index 1fe460bd20..229552bb63 100644 --- a/relay-pii/src/processor.rs +++ b/relay-pii/src/processor.rs @@ -57,389 +57,56 @@ impl<'a> PiiProcessor<'a> { } } -struct TryPairlistVisitor{ - latest_key: Option, - result: Result)>, ()> -} // TODO: store references - -impl Processor for TryPairlistVisitor { - #[inline] - fn process_string( - &mut self, - s: &mut String, - meta: &mut Meta, - state: &ProcessingState<'_>, - ) -> ProcessingResult where { - dbg!("visiting a string: ", &s); - - // let Ok(map) = &mut self.result else { return Ok(()) }; - - if dbg!(state.depth()) == 1 && dbg!(state.path().index()) == Some(0) { - dbg!("pushing a string: ", &s); - self.latest_key = Some(s.clone()); - } - Ok(()) - } - - +#[derive(Default)] +struct IsObjectPair { + is_pair: bool, + has_string_key: bool, +} - #[inline] +impl Processor for IsObjectPair { fn process_array( &mut self, value: &mut relay_protocol::Array, - meta: &mut Meta, + _meta: &mut Meta, state: &ProcessingState<'_>, ) -> ProcessingResult where T: ProcessValue, { - if value.len() != 2 { - self.result = Err(()); - return Ok(()); + self.is_pair = value.len() == 2; + if self.is_pair { + // Visit only key: + process_value(&mut value[0], self, state); } - - let key = value[0]; - // visit key: - process_value(&mut value[0], self, state); - match (self.latest_key, self.result) { - (Some(key), Ok(map)) => map.push((key, T::into_value(self))) - _ => { - self.result = Err(()); return Ok(()) - } - } - - Ok(()) - } - - // TODO: before_process early return - - #[inline] - fn process_object( - &mut self, - value: &mut relay_protocol::Object, - meta: &mut Meta, - state: &ProcessingState<'_>, - ) -> ProcessingResult - where - T: ProcessValue, - { - value.process_child_values(self, state)?; - Ok(()) - } - - #[inline] - fn process_pairlist( - &mut self, - value: &mut PairList, - meta: &mut Meta, - state: &ProcessingState<'_>, - ) -> ProcessingResult - where - T: ProcessValue, - T: AsPair, - { - value.process_child_values(self, state)?; - Ok(()) - } - - #[inline] - fn process_values( - &mut self, - value: &mut Values, - meta: &mut Meta, - state: &ProcessingState<'_>, - ) -> ProcessingResult - where - T: ProcessValue, - { - value.process_child_values(self, state)?; - Ok(()) - } - - #[inline] - fn process_timestamp( - &mut self, - value: &mut Timestamp, - meta: &mut Meta, - state: &ProcessingState<'_>, - ) -> ProcessingResult where { - value.process_child_values(self, state)?; - Ok(()) - } - - #[inline] - fn process_event( - &mut self, - value: &mut Event, - meta: &mut Meta, - state: &ProcessingState<'_>, - ) -> ProcessingResult where { - value.process_child_values(self, state)?; - Ok(()) - } - - #[inline] - fn process_replay( - &mut self, - value: &mut Replay, - meta: &mut Meta, - state: &ProcessingState<'_>, - ) -> ProcessingResult where { - value.process_child_values(self, state)?; Ok(()) } - #[inline] - fn process_exception( - &mut self, - value: &mut Exception, - meta: &mut Meta, - state: &ProcessingState<'_>, - ) -> ProcessingResult where { - value.process_child_values(self, state)?; - Ok(()) - } - - #[inline] - fn process_raw_stacktrace( - &mut self, - value: &mut RawStacktrace, - meta: &mut Meta, - state: &ProcessingState<'_>, - ) -> ProcessingResult where { - value.process_child_values(self, state)?; - Ok(()) - } - - #[inline] - fn process_stacktrace( - &mut self, - value: &mut Stacktrace, - meta: &mut Meta, - state: &ProcessingState<'_>, - ) -> ProcessingResult where { - value.process_child_values(self, state)?; - Ok(()) - } - - #[inline] - fn process_frame( - &mut self, - value: &mut Frame, - meta: &mut Meta, - state: &ProcessingState<'_>, - ) -> ProcessingResult where { - value.process_child_values(self, state)?; - Ok(()) - } - - #[inline] - fn process_request( - &mut self, - value: &mut Request, - meta: &mut Meta, - state: &ProcessingState<'_>, - ) -> ProcessingResult where { - value.process_child_values(self, state)?; - Ok(()) - } - - #[inline] - fn process_user( - &mut self, - value: &mut User, - meta: &mut Meta, - state: &ProcessingState<'_>, - ) -> ProcessingResult where { - value.process_child_values(self, state)?; - Ok(()) - } - - #[inline] - fn process_client_sdk_info( - &mut self, - value: &mut ClientSdkInfo, - meta: &mut Meta, - state: &ProcessingState<'_>, - ) -> ProcessingResult where { - value.process_child_values(self, state)?; - Ok(()) - } - - #[inline] - fn process_debug_meta( - &mut self, - value: &mut DebugMeta, - meta: &mut Meta, - state: &ProcessingState<'_>, - ) -> ProcessingResult where { - value.process_child_values(self, state)?; - Ok(()) - } - - #[inline] - fn process_debug_image( - &mut self, - value: &mut DebugImage, - meta: &mut Meta, - state: &ProcessingState<'_>, - ) -> ProcessingResult where { - value.process_child_values(self, state)?; - Ok(()) - } - - #[inline] - fn process_geo( - &mut self, - value: &mut Geo, - meta: &mut Meta, - state: &ProcessingState<'_>, - ) -> ProcessingResult where { - value.process_child_values(self, state)?; - Ok(()) - } - - #[inline] - fn process_logentry( - &mut self, - value: &mut LogEntry, - meta: &mut Meta, - state: &ProcessingState<'_>, - ) -> ProcessingResult where { - value.process_child_values(self, state)?; - Ok(()) - } - - #[inline] - fn process_thread( - &mut self, - value: &mut Thread, - meta: &mut Meta, - state: &ProcessingState<'_>, - ) -> ProcessingResult where { - value.process_child_values(self, state)?; - Ok(()) - } - - #[inline] - fn process_context( - &mut self, - value: &mut Context, - meta: &mut Meta, - state: &ProcessingState<'_>, - ) -> ProcessingResult where { - value.process_child_values(self, state)?; - Ok(()) - } - - #[inline] - fn process_breadcrumb( - &mut self, - value: &mut Breadcrumb, - meta: &mut Meta, - state: &ProcessingState<'_>, - ) -> ProcessingResult where { - value.process_child_values(self, state)?; - Ok(()) - } - - #[inline] - fn process_template_info( - &mut self, - value: &mut TemplateInfo, - meta: &mut Meta, - state: &ProcessingState<'_>, - ) -> ProcessingResult where { - value.process_child_values(self, state)?; - Ok(()) - } - - #[inline] - fn process_header_name( - &mut self, - value: &mut HeaderName, - meta: &mut Meta, - state: &ProcessingState<'_>, - ) -> ProcessingResult where { - value.process_child_values(self, state)?; - Ok(()) - } - - #[inline] - fn process_span( - &mut self, - value: &mut Span, - meta: &mut Meta, - state: &ProcessingState<'_>, - ) -> ProcessingResult where { - value.process_child_values(self, state)?; - Ok(()) - } - - #[inline] - fn process_trace_context( - &mut self, - value: &mut TraceContext, - meta: &mut Meta, - state: &ProcessingState<'_>, - ) -> ProcessingResult where { - value.process_child_values(self, state)?; - Ok(()) - } - - #[inline] - fn process_native_image_path( - &mut self, - value: &mut NativeImagePath, - meta: &mut Meta, - state: &ProcessingState<'_>, - ) -> ProcessingResult where { - value.process_child_values(self, state)?; - Ok(()) - } - - #[inline] - fn process_contexts( + fn process_string( &mut self, - value: &mut Contexts, - meta: &mut Meta, + s: &mut String, + _meta: &mut Meta, state: &ProcessingState<'_>, ) -> ProcessingResult where { - value.process_child_values(self, state)?; - Ok(()) - } + dbg!("visiting a string: ", &s); - fn process_other( - &mut self, - other: &mut relay_protocol::Object, - state: &ProcessingState<'_>, - ) -> ProcessingResult { - for (key, value) in other { - process_value( - value, - self, - &state.enter_borrowed( - key.as_str(), - state.inner_attrs(), - ValueType::for_field(value), - ), - )?; + if dbg!(state.depth()) == 1 && dbg!(state.path().index()) == Some(0) { + dbg!("pushing a string: ", &s); + self.has_string_key = true; } - Ok(()) } } -fn try_as_pairlist(array: &mut Array) -> Result, ()> { - let mut visitor = TryPairlistVisitor(Ok(BTreeMap::new())); - - array.process_child_values(&mut visitor, ProcessingState::root()); - match visitor.0 { - Ok(map) if !map.is_empty() => Ok(Object::from_iter( - map.into_iter().map(|(k, v)| (k.to_owned(), v.clone())), - )), - _ => Err(()), +fn is_pairlist(array: &mut Array) -> bool { + for element in array.iter_mut() { + let mut visitor = IsObjectPair::default(); + process_value(element, &mut visitor, ProcessingState::root()); + if !visitor.is_pair || !visitor.has_string_key { + return false; + } } + + !array.is_empty() } impl<'a> Processor for PiiProcessor<'a> { @@ -488,24 +155,30 @@ impl<'a> Processor for PiiProcessor<'a> { fn process_array( &mut self, - value: &mut Array, + array: &mut Array, meta: &mut Meta, state: &ProcessingState<'_>, ) -> ProcessingResult where T: ProcessValue, { - match try_as_pairlist(value) { - Ok(mut obj) => { - self.process_pairlist(obj, meta, state) - // TODO: convert object back to array - } // TODO: enter_nothing? - Err(_) => { - // Recurse into each element of the array since we also want to individually scrub each - // value in the array. - value.process_child_values(self, state) - } + if is_pairlist(array) { + for element in array {} + Ok(()) + } else { + // default treatment. + array.process_child_values(self, state) } + // match try_as_pairlist(value) { + // Ok(mut obj) => { + // self.process_pairlist(obj, meta, state) + // // TODO: convert object back to array + // } // TODO: enter_nothing? + // Err(_) => { + // // Recurse into each element of the array since we also want to individually scrub each + // // value in the array. + // } + // } // If the array has length 2, we treat it as key-value pair and try to scrub it. If the // scrubbing doesn't do anything, we try to scrub values individually. From a2d78680de9926831a89cd8f69c4d11fa716a785 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Tue, 28 May 2024 13:32:10 +0200 Subject: [PATCH 14/28] test --- relay-pii/src/processor.rs | 192 +++++++++++++++++++++---------------- 1 file changed, 108 insertions(+), 84 deletions(-) diff --git a/relay-pii/src/processor.rs b/relay-pii/src/processor.rs index 229552bb63..7c84e628d4 100644 --- a/relay-pii/src/processor.rs +++ b/relay-pii/src/processor.rs @@ -11,7 +11,7 @@ use relay_event_schema::processor::{ use relay_event_schema::protocol::{ AsPair, Event, IpAddr, NativeImagePath, PairList, Replay, ResponseContext, User, }; -use relay_protocol::{Annotated, Array, Meta, Object, Remark, RemarkType, Value}; +use relay_protocol::{Annotated, Array, Meta, Remark, RemarkType, Value}; use crate::compiledconfig::{CompiledPiiConfig, RuleRef}; use crate::config::RuleType; @@ -57,58 +57,6 @@ impl<'a> PiiProcessor<'a> { } } -#[derive(Default)] -struct IsObjectPair { - is_pair: bool, - has_string_key: bool, -} - -impl Processor for IsObjectPair { - fn process_array( - &mut self, - value: &mut relay_protocol::Array, - _meta: &mut Meta, - state: &ProcessingState<'_>, - ) -> ProcessingResult - where - T: ProcessValue, - { - self.is_pair = value.len() == 2; - if self.is_pair { - // Visit only key: - process_value(&mut value[0], self, state); - } - Ok(()) - } - - fn process_string( - &mut self, - s: &mut String, - _meta: &mut Meta, - state: &ProcessingState<'_>, - ) -> ProcessingResult where { - dbg!("visiting a string: ", &s); - - if dbg!(state.depth()) == 1 && dbg!(state.path().index()) == Some(0) { - dbg!("pushing a string: ", &s); - self.has_string_key = true; - } - Ok(()) - } -} - -fn is_pairlist(array: &mut Array) -> bool { - for element in array.iter_mut() { - let mut visitor = IsObjectPair::default(); - process_value(element, &mut visitor, ProcessingState::root()); - if !visitor.is_pair || !visitor.has_string_key { - return false; - } - } - - !array.is_empty() -} - impl<'a> Processor for PiiProcessor<'a> { fn before_process( &mut self, @@ -156,48 +104,42 @@ impl<'a> Processor for PiiProcessor<'a> { fn process_array( &mut self, array: &mut Array, - meta: &mut Meta, + _meta: &mut Meta, state: &ProcessingState<'_>, ) -> ProcessingResult where T: ProcessValue, { if is_pairlist(array) { - for element in array {} + for annotated in array { + let mut mapped = std::mem::take(annotated).map_value(T::into_value); + if let Some(Value::Array(ref mut pair)) = mapped.value_mut() { + let mut value = std::mem::take(&mut pair[1]); + + let value_type = ValueType::for_field(&pair[1]); + // BEGIN copied from process_pairlist + if let Some(key_name) = &pair[0].as_str() { + // if the pair has no key name, we skip over it for PII stripping. It is + // still processed with index-based path in the invocation of + // `process_child_values`. + let entered = + state.enter_borrowed(key_name, state.inner_attrs(), value_type); + + processor::process_value(&mut value, self, &entered)?; + } + // END copied from process_pairlist + + // Put value back into pair + pair[1] = value; + } + // Put pair back into array + *annotated = T::from_value(mapped); + } Ok(()) } else { // default treatment. array.process_child_values(self, state) } - // match try_as_pairlist(value) { - // Ok(mut obj) => { - // self.process_pairlist(obj, meta, state) - // // TODO: convert object back to array - // } // TODO: enter_nothing? - // Err(_) => { - // // Recurse into each element of the array since we also want to individually scrub each - // // value in the array. - // } - // } - - // If the array has length 2, we treat it as key-value pair and try to scrub it. If the - // scrubbing doesn't do anything, we try to scrub values individually. - // if value.len() == 2 { - // if let Some(key) = value[0].clone().into_value() { - // if let Value::String(key_name) = key.into_value() { - // if let Some(inner_value) = value[1].value() { - // // We compute a new state which has the first element of the array as the - // // key. - // let entered = state.enter_borrowed( - // key_name.as_str(), - // state.inner_attrs(), - // inner_value.value_type(), - // ); - // process_value(&mut value[1], self, &entered)?; - // } - // } - // } - // } } fn process_string( @@ -301,6 +243,60 @@ impl<'a> Processor for PiiProcessor<'a> { } } +#[derive(Default)] +struct IsObjectPair { + is_pair: bool, + has_string_key: bool, +} + +impl Processor for IsObjectPair { + fn process_array( + &mut self, + value: &mut relay_protocol::Array, + _meta: &mut Meta, + state: &ProcessingState<'_>, + ) -> ProcessingResult + where + T: ProcessValue, + { + self.is_pair = value.len() == 2; + if self.is_pair { + // Visit only key: + let value_type = ValueType::for_field(&value[0]); + process_value( + &mut value[0], + self, + &state.enter_index(0, state.inner_attrs(), value_type), + )?; + } + Ok(()) + } + + fn process_string( + &mut self, + _value: &mut String, + _meta: &mut Meta, + state: &ProcessingState<'_>, + ) -> ProcessingResult where { + if state.depth() == 1 && state.path().index() == Some(0) { + self.has_string_key = true; + } + Ok(()) + } +} + +fn is_pairlist(array: &mut Array) -> bool { + for element in array.iter_mut() { + let mut visitor = IsObjectPair::default(); + process_value(element, &mut visitor, ProcessingState::root()).ok(); + if !visitor.is_pair || !visitor.has_string_key { + return false; + } + } + + !array.is_empty() +} + /// Scrubs GraphQL variables from the event. pub fn scrub_graphql(event: &mut Event) { let mut keys: BTreeSet<&str> = BTreeSet::new(); @@ -1722,6 +1718,34 @@ mod tests { )"#); } + #[test] + fn test_is_pairlist() { + for (case, expected) in [ + (r#"[]"#, false), + (r#"["foo"]"#, false), + (r#"["foo", 123]"#, false), + (r#"[[1, "foo"]]"#, false), + (r#"[[["too_nested", 123]]]"#, false), + (r#"[["foo", "bar"], [1, "foo"]]"#, false), + (r#"[["foo", "bar"], ["foo", "bar", "baz"]]"#, false), + (r#"[["foo", "bar", "baz"], ["foo", "bar"]]"#, false), + (r#"["foo", ["bar", "baz"], ["foo", "bar"]]"#, false), + (r#"[["foo", "bar"], [["too_nested", 123]]]"#, false), + (r#"[["foo", 123]]"#, true), + (r#"[["foo", "bar"]]"#, true), + ( + r#"[["foo", "bar"], ["foo", {"nested": {"something": 1}}]]"#, + true, + ), + ] { + let v = Annotated::::from_json(case).unwrap(); + let Annotated(Some(Value::Array(mut a)), _) = v else { + panic!() + }; + assert_eq!(is_pairlist(&mut a), expected, "{}", case); + } + } + #[test] fn test_tuple_array_scrubbed_with_path_selector() { // We expect that both of these configs express the same semantics. From 8bfc141cf7c8f04483bac5d0b9b4fdc5deed28f9 Mon Sep 17 00:00:00 2001 From: Riccardo Busetti Date: Tue, 28 May 2024 16:08:46 +0200 Subject: [PATCH 15/28] Fix --- relay-pii/src/processor.rs | 82 +++++++++++++++++--------------------- 1 file changed, 37 insertions(+), 45 deletions(-) diff --git a/relay-pii/src/processor.rs b/relay-pii/src/processor.rs index 7c84e628d4..7853ac6d99 100644 --- a/relay-pii/src/processor.rs +++ b/relay-pii/src/processor.rs @@ -111,33 +111,39 @@ impl<'a> Processor for PiiProcessor<'a> { T: ProcessValue, { if is_pairlist(array) { - for annotated in array { - let mut mapped = std::mem::take(annotated).map_value(T::into_value); + for (index, annotated) in array.iter_mut().enumerate() { + let mut mapped = mem::take(annotated).map_value(T::into_value); + if let Some(Value::Array(ref mut pair)) = mapped.value_mut() { - let mut value = std::mem::take(&mut pair[1]); + let mut value = mem::take(&mut pair[1]); + let value_type = ValueType::for_field(&value); - let value_type = ValueType::for_field(&pair[1]); - // BEGIN copied from process_pairlist if let Some(key_name) = &pair[0].as_str() { - // if the pair has no key name, we skip over it for PII stripping. It is - // still processed with index-based path in the invocation of - // `process_child_values`. - let entered = - state.enter_borrowed(key_name, state.inner_attrs(), value_type); - - processor::process_value(&mut value, self, &entered)?; + // We enter the index of the array. + let index_state = state.enter_index(index, state.inner_attrs(), value_type); + // We enter the key of the first element of the array, since we treat it + // as a pair. + let key_state = index_state.enter_borrowed( + key_name, + index_state.inner_attrs(), + value_type, + ); + // We process the value with a state that "simulates" the first value of the + // array as if it was the key of a dictionary. + process_value(&mut value, self, &key_state)?; } - // END copied from process_pairlist - // Put value back into pair + // Put value back into pair. pair[1] = value; } - // Put pair back into array + + // Put pair back into array. *annotated = T::from_value(mapped); } + Ok(()) } else { - // default treatment. + // If we didn't find a pairlist, we can process child values as normal. array.process_child_values(self, state) } } @@ -244,12 +250,20 @@ impl<'a> Processor for PiiProcessor<'a> { } #[derive(Default)] -struct IsObjectPair { +struct PairArrayChecker { is_pair: bool, has_string_key: bool, } -impl Processor for IsObjectPair { +impl PairArrayChecker { + /// Returns true if the visitor identified the supplied data as an array composed of + /// a key (string) and a value. + fn is_pair_array(&self) -> bool { + self.is_pair && self.has_string_key + } +} + +impl Processor for PairArrayChecker { fn process_array( &mut self, value: &mut relay_protocol::Array, @@ -259,9 +273,8 @@ impl Processor for IsObjectPair { where T: ProcessValue, { - self.is_pair = value.len() == 2; + self.is_pair = state.depth() == 0 && value.len() == 2; if self.is_pair { - // Visit only key: let value_type = ValueType::for_field(&value[0]); process_value( &mut value[0], @@ -287,9 +300,9 @@ impl Processor for IsObjectPair { fn is_pairlist(array: &mut Array) -> bool { for element in array.iter_mut() { - let mut visitor = IsObjectPair::default(); + let mut visitor = PairArrayChecker::default(); process_value(element, &mut visitor, ProcessingState::root()).ok(); - if !visitor.is_pair || !visitor.has_string_key { + if !visitor.is_pair_array() { return false; } } @@ -1899,29 +1912,8 @@ mod tests { [ Array( [ - Annotated( - String( - "", - ), - Meta { - remarks: [ - Remark { - ty: Removed, - rule_id: "@password:remove", - range: Some( - ( - 0, - 0, - ), - ), - }, - ], - errors: [], - original_length: Some( - 13, - ), - original_value: None, - }, + String( + "authorization", ), Meta { remarks: [ From 7a2c98ebae20bfccdabbb567431b7736d9d2194f Mon Sep 17 00:00:00 2001 From: Riccardo Busetti Date: Tue, 28 May 2024 16:16:47 +0200 Subject: [PATCH 16/28] Fix --- ...i__convert__tests__bearer_tokens_scrubbed.snap | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/relay-pii/src/snapshots/relay_pii__convert__tests__bearer_tokens_scrubbed.snap b/relay-pii/src/snapshots/relay_pii__convert__tests__bearer_tokens_scrubbed.snap index 162f954c18..12634a1f33 100644 --- a/relay-pii/src/snapshots/relay_pii__convert__tests__bearer_tokens_scrubbed.snap +++ b/relay-pii/src/snapshots/relay_pii__convert__tests__bearer_tokens_scrubbed.snap @@ -13,7 +13,7 @@ expression: data "request": { "headers": [ [ - "[Filtered]", + "AuthToken", "[Filtered]" ] ] @@ -36,19 +36,6 @@ expression: data "request": { "headers": { "0": { - "0": { - "": { - "rem": [ - [ - "@password:filter", - "s", - 0, - 10 - ] - ], - "len": 9 - } - }, "1": { "": { "rem": [ From 0d40b4c6fcca8129a946ee04fbea7f09d9ed4937 Mon Sep 17 00:00:00 2001 From: Riccardo Busetti Date: Tue, 28 May 2024 16:42:20 +0200 Subject: [PATCH 17/28] Fix --- relay-pii/src/processor.rs | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/relay-pii/src/processor.rs b/relay-pii/src/processor.rs index 7853ac6d99..31f4ecd1e3 100644 --- a/relay-pii/src/processor.rs +++ b/relay-pii/src/processor.rs @@ -119,15 +119,10 @@ impl<'a> Processor for PiiProcessor<'a> { let value_type = ValueType::for_field(&value); if let Some(key_name) = &pair[0].as_str() { - // We enter the index of the array. - let index_state = state.enter_index(index, state.inner_attrs(), value_type); // We enter the key of the first element of the array, since we treat it // as a pair. - let key_state = index_state.enter_borrowed( - key_name, - index_state.inner_attrs(), - value_type, - ); + let key_state = + state.enter_borrowed(key_name, state.inner_attrs(), value_type); // We process the value with a state that "simulates" the first value of the // array as if it was the key of a dictionary. process_value(&mut value, self, &key_state)?; @@ -1768,7 +1763,7 @@ mod tests { r##" { "applications": { - "exception.values.0.stacktrace.frames.0.vars.headers.0.authorization": ["@anything:replace"] + "exception.values.0.stacktrace.frames.0.vars.headers.authorization": ["@anything:replace"] } } "##, From b3194c9f0516a03ebcb19e158cde57ad00940464 Mon Sep 17 00:00:00 2001 From: Riccardo Busetti Date: Tue, 28 May 2024 16:49:26 +0200 Subject: [PATCH 18/28] Fix --- relay-pii/src/convert.rs | 41 ++++++++++ ...ii__convert__tests__pairlist_scrubbed.snap | 82 +++++++++++++++++++ 2 files changed, 123 insertions(+) create mode 100644 relay-pii/src/snapshots/relay_pii__convert__tests__pairlist_scrubbed.snap diff --git a/relay-pii/src/convert.rs b/relay-pii/src/convert.rs index eadbf44497..88dfdfb892 100644 --- a/relay-pii/src/convert.rs +++ b/relay-pii/src/convert.rs @@ -1656,4 +1656,45 @@ THd+9FBxiHLGXNKhG/FRSyREXEt+NyYIf/0cyByc9tNksat794ddUqnLOg0vwSkv process_value(&mut data, &mut pii_processor, ProcessingState::root()).unwrap(); assert_annotated_snapshot!(data); } + + #[test] + fn test_pairlist_scrubbed() { + let mut data = Event::from_value( + serde_json::json!({ + "threads": { + "values": [ + { + "stacktrace": { + "frames": [ + { + "vars": { + "request": { + "headers": [ + [ + "secret", + "A1BBC234QWERTY0987MNBV012765HJKL" + ], + [ + "passwd", + "my_password" + ] + ] + } + } + } + + ] + } + } + ] + } + }) + .into(), + ); + + let pii_config = simple_enabled_pii_config(); + let mut pii_processor = PiiProcessor::new(pii_config.compiled()); + process_value(&mut data, &mut pii_processor, ProcessingState::root()).unwrap(); + assert_annotated_snapshot!(data); + } } diff --git a/relay-pii/src/snapshots/relay_pii__convert__tests__pairlist_scrubbed.snap b/relay-pii/src/snapshots/relay_pii__convert__tests__pairlist_scrubbed.snap new file mode 100644 index 0000000000..937416f08e --- /dev/null +++ b/relay-pii/src/snapshots/relay_pii__convert__tests__pairlist_scrubbed.snap @@ -0,0 +1,82 @@ +--- +source: relay-pii/src/convert.rs +expression: data +--- +{ + "threads": { + "values": [ + { + "stacktrace": { + "frames": [ + { + "vars": { + "request": { + "headers": [ + [ + "secret", + "[Filtered]" + ], + [ + "passwd", + "[Filtered]" + ] + ] + } + } + } + ] + } + } + ] + }, + "_meta": { + "threads": { + "values": { + "0": { + "stacktrace": { + "frames": { + "0": { + "vars": { + "request": { + "headers": { + "0": { + "1": { + "": { + "rem": [ + [ + "@password:filter", + "s", + 0, + 10 + ] + ], + "len": 32 + } + } + }, + "1": { + "1": { + "": { + "rem": [ + [ + "@password:filter", + "s", + 0, + 10 + ] + ], + "len": 11 + } + } + } + } + } + } + } + } + } + } + } + } + } +} From 53e68be64b9eafcdda5413dcc468215b7740b8a6 Mon Sep 17 00:00:00 2001 From: Riccardo Busetti Date: Tue, 28 May 2024 16:51:57 +0200 Subject: [PATCH 19/28] Fix --- relay-pii/src/processor.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/relay-pii/src/processor.rs b/relay-pii/src/processor.rs index 31f4ecd1e3..c6ae8eefde 100644 --- a/relay-pii/src/processor.rs +++ b/relay-pii/src/processor.rs @@ -111,7 +111,7 @@ impl<'a> Processor for PiiProcessor<'a> { T: ProcessValue, { if is_pairlist(array) { - for (index, annotated) in array.iter_mut().enumerate() { + for annotated in array { let mut mapped = mem::take(annotated).map_value(T::into_value); if let Some(Value::Array(ref mut pair)) = mapped.value_mut() { From 684b095204a5421053eae4e86f143227e2ea058c Mon Sep 17 00:00:00 2001 From: Riccardo Busetti Date: Tue, 28 May 2024 16:56:16 +0200 Subject: [PATCH 20/28] Update relay-pii/src/processor.rs Co-authored-by: David Herberth --- relay-pii/src/processor.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/relay-pii/src/processor.rs b/relay-pii/src/processor.rs index c6ae8eefde..946286bb4f 100644 --- a/relay-pii/src/processor.rs +++ b/relay-pii/src/processor.rs @@ -1750,7 +1750,7 @@ mod tests { let Annotated(Some(Value::Array(mut a)), _) = v else { panic!() }; - assert_eq!(is_pairlist(&mut a), expected, "{}", case); + assert_eq!(is_pairlist(&mut a), expected, "{case}"); } } From 48aef40ef83056795e81a41b3c9218090d8b967a Mon Sep 17 00:00:00 2001 From: Riccardo Busetti Date: Tue, 28 May 2024 17:01:13 +0200 Subject: [PATCH 21/28] Fix --- relay-pii/src/processor.rs | 28 ++-------------------------- 1 file changed, 2 insertions(+), 26 deletions(-) diff --git a/relay-pii/src/processor.rs b/relay-pii/src/processor.rs index c6ae8eefde..11b7c1f661 100644 --- a/relay-pii/src/processor.rs +++ b/relay-pii/src/processor.rs @@ -592,30 +592,6 @@ mod tests { rv } - fn extract_vars(event: Option<&Event>) -> &FrameVars { - event - .unwrap() - .exceptions - .value() - .unwrap() - .values - .value() - .unwrap()[0] - .value() - .unwrap() - .stacktrace - .value() - .unwrap() - .frames - .value() - .unwrap()[0] - .value() - .unwrap() - .vars - .value() - .unwrap() - } - #[test] fn test_scrub_original_value() { let mut data = Event::from_value( @@ -1809,7 +1785,7 @@ mod tests { let mut processor = PiiProcessor::new(config.compiled()); process_value(&mut event, &mut processor, ProcessingState::root()).unwrap(); - let vars = extract_vars(event.value()); + let vars = get_value!(event.exceptions.values[0].stacktrace.frames[0].vars).unwrap(); allow_duplicates!(assert_debug_snapshot!(vars, @r#" FrameVars( @@ -1898,7 +1874,7 @@ mod tests { let mut processor = PiiProcessor::new(config.compiled()); process_value(&mut event, &mut processor, ProcessingState::root()).unwrap(); - let vars = extract_vars(event.value()); + let vars = get_value!(event.exceptions.values[0].stacktrace.frames[0].vars).unwrap(); assert_debug_snapshot!(vars, @r###" FrameVars( From 123f6bea506e7a8a883d2d084744db8b2b278486 Mon Sep 17 00:00:00 2001 From: Riccardo Busetti Date: Tue, 28 May 2024 17:05:05 +0200 Subject: [PATCH 22/28] Fix --- relay-pii/src/processor.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/relay-pii/src/processor.rs b/relay-pii/src/processor.rs index 18ba6a6eaa..6de633c344 100644 --- a/relay-pii/src/processor.rs +++ b/relay-pii/src/processor.rs @@ -572,7 +572,7 @@ mod tests { use insta::{allow_duplicates, assert_debug_snapshot}; use relay_event_schema::processor::process_value; use relay_event_schema::protocol::{ - Addr, Breadcrumb, DebugImage, DebugMeta, ExtraValue, FrameVars, Headers, LogEntry, Message, + Addr, Breadcrumb, DebugImage, DebugMeta, ExtraValue, Headers, LogEntry, Message, NativeDebugImage, Request, Span, TagEntry, Tags, TraceContext, }; use relay_protocol::{assert_annotated_snapshot, get_value, FromValue, Object}; From 24d772b3d7881efacddc49adebc06e28c5e544df Mon Sep 17 00:00:00 2001 From: Riccardo Busetti Date: Wed, 5 Jun 2024 10:25:42 +0200 Subject: [PATCH 23/28] Fix --- relay-pii/src/processor.rs | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/relay-pii/src/processor.rs b/relay-pii/src/processor.rs index 6de633c344..5ccc680886 100644 --- a/relay-pii/src/processor.rs +++ b/relay-pii/src/processor.rs @@ -245,23 +245,23 @@ impl<'a> Processor for PiiProcessor<'a> { } #[derive(Default)] -struct PairArrayChecker { +struct PairListProcessor { is_pair: bool, has_string_key: bool, } -impl PairArrayChecker { - /// Returns true if the visitor identified the supplied data as an array composed of +impl PairListProcessor { + /// Returns true if the processor identified the supplied data as an array composed of /// a key (string) and a value. fn is_pair_array(&self) -> bool { self.is_pair && self.has_string_key } } -impl Processor for PairArrayChecker { +impl Processor for PairListProcessor { fn process_array( &mut self, - value: &mut relay_protocol::Array, + value: &mut Array, _meta: &mut Meta, state: &ProcessingState<'_>, ) -> ProcessingResult @@ -277,6 +277,7 @@ impl Processor for PairArrayChecker { &state.enter_index(0, state.inner_attrs(), value_type), )?; } + Ok(()) } @@ -289,13 +290,14 @@ impl Processor for PairArrayChecker { if state.depth() == 1 && state.path().index() == Some(0) { self.has_string_key = true; } + Ok(()) } } fn is_pairlist(array: &mut Array) -> bool { for element in array.iter_mut() { - let mut visitor = PairArrayChecker::default(); + let mut visitor = PairListProcessor::default(); process_value(element, &mut visitor, ProcessingState::root()).ok(); if !visitor.is_pair_array() { return false; From 815a230ab860ff5e3f6ba2bba591ef8f0e6a2413 Mon Sep 17 00:00:00 2001 From: Riccardo Busetti Date: Thu, 6 Jun 2024 16:04:58 +0200 Subject: [PATCH 24/28] Update CHANGELOG.md Co-authored-by: Joris Bayer --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4ea25ac5ff..0469c95242 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,7 +4,7 @@ **Internal**: -- Allow treating arrays of two elements as key-value pairs during PII scrubbing. ([#3639](https://github.com/getsentry/relay/pull/3639)) +- Treat arrays of pairs as key-value mappings during PII scrubbing. ([#3639](https://github.com/getsentry/relay/pull/3639)) ## 24.5.1 From 26842af607857bdb58f377ea56633ef3388487ed Mon Sep 17 00:00:00 2001 From: Riccardo Busetti Date: Thu, 6 Jun 2024 16:07:39 +0200 Subject: [PATCH 25/28] Fix --- relay-pii/src/processor.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/relay-pii/src/processor.rs b/relay-pii/src/processor.rs index 5ccc680886..26fac7787d 100644 --- a/relay-pii/src/processor.rs +++ b/relay-pii/src/processor.rs @@ -270,11 +270,11 @@ impl Processor for PairListProcessor { { self.is_pair = state.depth() == 0 && value.len() == 2; if self.is_pair { - let value_type = ValueType::for_field(&value[0]); + let key_type = ValueType::for_field(&value[0]); process_value( &mut value[0], self, - &state.enter_index(0, state.inner_attrs(), value_type), + &state.enter_index(0, state.inner_attrs(), key_type), )?; } From eff37fe780fce8d8ed024c21ef083fe87f8447c3 Mon Sep 17 00:00:00 2001 From: Riccardo Busetti Date: Thu, 6 Jun 2024 16:41:42 +0200 Subject: [PATCH 26/28] Fix --- relay-pii/src/convert.rs | 49 +++++++++++++- ...airlist_scrubbed_with_matching_values.snap | 67 +++++++++++++++++++ 2 files changed, 115 insertions(+), 1 deletion(-) create mode 100644 relay-pii/src/snapshots/relay_pii__convert__tests__pairlist_scrubbed_with_matching_values.snap diff --git a/relay-pii/src/convert.rs b/relay-pii/src/convert.rs index 88dfdfb892..f2243c2e91 100644 --- a/relay-pii/src/convert.rs +++ b/relay-pii/src/convert.rs @@ -1658,7 +1658,7 @@ THd+9FBxiHLGXNKhG/FRSyREXEt+NyYIf/0cyByc9tNksat794ddUqnLOg0vwSkv } #[test] - fn test_pairlist_scrubbed() { + fn test_pairlist_scrubbed_with_matching_keys() { let mut data = Event::from_value( serde_json::json!({ "threads": { @@ -1672,10 +1672,12 @@ THd+9FBxiHLGXNKhG/FRSyREXEt+NyYIf/0cyByc9tNksat794ddUqnLOg0vwSkv "headers": [ [ "secret", + // Should be filtered because of key `secret`. "A1BBC234QWERTY0987MNBV012765HJKL" ], [ "passwd", + // Should be filtered because of key `passwd`. "my_password" ] ] @@ -1697,4 +1699,49 @@ THd+9FBxiHLGXNKhG/FRSyREXEt+NyYIf/0cyByc9tNksat794ddUqnLOg0vwSkv process_value(&mut data, &mut pii_processor, ProcessingState::root()).unwrap(); assert_annotated_snapshot!(data); } + + #[test] + fn test_pairlist_scrubbed_with_matching_values() { + let mut data = Event::from_value( + serde_json::json!({ + "threads": { + "values": [ + { + "stacktrace": { + "frames": [ + { + "vars": { + "request": { + "headers": [ + [ + "some_random_value", + // Should be filtered because it's treated + // as individual value. + "my_password" + ], + [ + "some_random_value_2", + // Should not be filtered because the value + // is scraped by default. + "abc" + ] + ] + } + } + } + + ] + } + } + ] + } + }) + .into(), + ); + + let pii_config = simple_enabled_pii_config(); + let mut pii_processor = PiiProcessor::new(pii_config.compiled()); + process_value(&mut data, &mut pii_processor, ProcessingState::root()).unwrap(); + assert_annotated_snapshot!(data); + } } diff --git a/relay-pii/src/snapshots/relay_pii__convert__tests__pairlist_scrubbed_with_matching_values.snap b/relay-pii/src/snapshots/relay_pii__convert__tests__pairlist_scrubbed_with_matching_values.snap new file mode 100644 index 0000000000..27477e18e8 --- /dev/null +++ b/relay-pii/src/snapshots/relay_pii__convert__tests__pairlist_scrubbed_with_matching_values.snap @@ -0,0 +1,67 @@ +--- +source: relay-pii/src/convert.rs +expression: data +--- +{ + "threads": { + "values": [ + { + "stacktrace": { + "frames": [ + { + "vars": { + "request": { + "headers": [ + [ + "some_random_value", + "[Filtered]" + ], + [ + "some_random_value_2", + "abc" + ] + ] + } + } + } + ] + } + } + ] + }, + "_meta": { + "threads": { + "values": { + "0": { + "stacktrace": { + "frames": { + "0": { + "vars": { + "request": { + "headers": { + "0": { + "1": { + "": { + "rem": [ + [ + "@password:filter", + "s", + 0, + 10 + ] + ], + "len": 11 + } + } + } + } + } + } + } + } + } + } + } + } + } +} From 685cf8348900d66df03c5264b65b1598b27e67a6 Mon Sep 17 00:00:00 2001 From: Riccardo Busetti Date: Thu, 6 Jun 2024 16:42:17 +0200 Subject: [PATCH 27/28] Fix --- ..._pairlist_scrubbed_with_matching_keys.snap | 82 +++++++++++++++++++ 1 file changed, 82 insertions(+) create mode 100644 relay-pii/src/snapshots/relay_pii__convert__tests__pairlist_scrubbed_with_matching_keys.snap diff --git a/relay-pii/src/snapshots/relay_pii__convert__tests__pairlist_scrubbed_with_matching_keys.snap b/relay-pii/src/snapshots/relay_pii__convert__tests__pairlist_scrubbed_with_matching_keys.snap new file mode 100644 index 0000000000..937416f08e --- /dev/null +++ b/relay-pii/src/snapshots/relay_pii__convert__tests__pairlist_scrubbed_with_matching_keys.snap @@ -0,0 +1,82 @@ +--- +source: relay-pii/src/convert.rs +expression: data +--- +{ + "threads": { + "values": [ + { + "stacktrace": { + "frames": [ + { + "vars": { + "request": { + "headers": [ + [ + "secret", + "[Filtered]" + ], + [ + "passwd", + "[Filtered]" + ] + ] + } + } + } + ] + } + } + ] + }, + "_meta": { + "threads": { + "values": { + "0": { + "stacktrace": { + "frames": { + "0": { + "vars": { + "request": { + "headers": { + "0": { + "1": { + "": { + "rem": [ + [ + "@password:filter", + "s", + 0, + 10 + ] + ], + "len": 32 + } + } + }, + "1": { + "1": { + "": { + "rem": [ + [ + "@password:filter", + "s", + 0, + 10 + ] + ], + "len": 11 + } + } + } + } + } + } + } + } + } + } + } + } + } +} From d4af9b6b42fbc9657577327806bdded61141405e Mon Sep 17 00:00:00 2001 From: Riccardo Busetti Date: Thu, 6 Jun 2024 16:43:13 +0200 Subject: [PATCH 28/28] Fix --- ...ii__convert__tests__pairlist_scrubbed.snap | 82 ------------------- 1 file changed, 82 deletions(-) delete mode 100644 relay-pii/src/snapshots/relay_pii__convert__tests__pairlist_scrubbed.snap diff --git a/relay-pii/src/snapshots/relay_pii__convert__tests__pairlist_scrubbed.snap b/relay-pii/src/snapshots/relay_pii__convert__tests__pairlist_scrubbed.snap deleted file mode 100644 index 937416f08e..0000000000 --- a/relay-pii/src/snapshots/relay_pii__convert__tests__pairlist_scrubbed.snap +++ /dev/null @@ -1,82 +0,0 @@ ---- -source: relay-pii/src/convert.rs -expression: data ---- -{ - "threads": { - "values": [ - { - "stacktrace": { - "frames": [ - { - "vars": { - "request": { - "headers": [ - [ - "secret", - "[Filtered]" - ], - [ - "passwd", - "[Filtered]" - ] - ] - } - } - } - ] - } - } - ] - }, - "_meta": { - "threads": { - "values": { - "0": { - "stacktrace": { - "frames": { - "0": { - "vars": { - "request": { - "headers": { - "0": { - "1": { - "": { - "rem": [ - [ - "@password:filter", - "s", - 0, - 10 - ] - ], - "len": 32 - } - } - }, - "1": { - "1": { - "": { - "rem": [ - [ - "@password:filter", - "s", - 0, - 10 - ] - ], - "len": 11 - } - } - } - } - } - } - } - } - } - } - } - } - } -}