From c9bd032c90b4ffb48d1a5cb01b8ecf1dff30991d Mon Sep 17 00:00:00 2001 From: Pierre Massat Date: Wed, 7 Dec 2022 22:41:52 -0500 Subject: [PATCH] feat(profiling): Expand the discard reasons --- relay-profiling/src/android.rs | 2 +- relay-profiling/src/cocoa.rs | 2 +- relay-profiling/src/error.rs | 8 +- relay-profiling/src/lib.rs | 4 +- relay-profiling/src/outcomes.rs | 44 ++++++++++ relay-profiling/src/python.rs | 2 +- relay-profiling/src/rust.rs | 2 +- relay-profiling/src/sample.rs | 96 +++++++++++++++++++++- relay-profiling/src/typescript.rs | 4 +- relay-server/src/actors/outcome.rs | 15 ++-- relay-server/src/actors/processor.rs | 117 +++++++++++++++++++++++++-- 11 files changed, 266 insertions(+), 30 deletions(-) create mode 100644 relay-profiling/src/outcomes.rs diff --git a/relay-profiling/src/android.rs b/relay-profiling/src/android.rs index b1f3dce5126..8ec2cf9154c 100644 --- a/relay-profiling/src/android.rs +++ b/relay-profiling/src/android.rs @@ -258,7 +258,7 @@ fn get_timestamp(clock: Clock, start_time: DateTime, event_time: Time) -> u fn parse_android_profile(payload: &[u8]) -> Result { let mut profile: AndroidProfile = - serde_json::from_slice(payload).map_err(ProfileError::InvalidJson)?; + serde_json::from_slice(payload).map_err(ProfileError::InvalidJSON)?; if profile.transactions.is_empty() && !profile.has_transaction_metadata() { return Err(ProfileError::NoTransactionAssociated); diff --git a/relay-profiling/src/cocoa.rs b/relay-profiling/src/cocoa.rs index 787be7e557e..c4dec78ba1c 100644 --- a/relay-profiling/src/cocoa.rs +++ b/relay-profiling/src/cocoa.rs @@ -185,7 +185,7 @@ pub fn expand_cocoa_profile(payload: &[u8]) -> Result>, ProfileError fn parse_cocoa_profile(payload: &[u8]) -> Result { let mut profile: CocoaProfile = - serde_json::from_slice(payload).map_err(ProfileError::InvalidJson)?; + serde_json::from_slice(payload).map_err(ProfileError::InvalidJSON)?; profile.remove_single_samples_per_thread(); diff --git a/relay-profiling/src/error.rs b/relay-profiling/src/error.rs index 2a7afdc8daa..1616c636b33 100644 --- a/relay-profiling/src/error.rs +++ b/relay-profiling/src/error.rs @@ -2,8 +2,8 @@ use thiserror::Error; #[derive(Debug, Error)] pub enum ProfileError { - #[error("invalid json in profile")] - InvalidJson(#[source] serde_json::Error), + #[error(transparent)] + InvalidJSON(#[from] serde_json::Error), #[error("invalid base64 value")] InvalidBase64Value, #[error("invalid sampled profile")] @@ -20,4 +20,8 @@ pub enum ProfileError { InvalidTransactionMetadata, #[error("missing profile metadata")] MissingProfileMetadata, + #[error("malformed stacks")] + MalformedStacks, + #[error("malformed samples")] + MalformedSamples, } diff --git a/relay-profiling/src/lib.rs b/relay-profiling/src/lib.rs index 695ab1c77c7..f092e0f53f4 100644 --- a/relay-profiling/src/lib.rs +++ b/relay-profiling/src/lib.rs @@ -100,6 +100,7 @@ mod cocoa; mod error; mod measurements; mod native_debug_image; +mod outcomes; mod python; mod rust; mod sample; @@ -115,6 +116,7 @@ use crate::sample::{expand_sample_profile, Version}; use crate::typescript::parse_typescript_profile; pub use crate::error::ProfileError; +pub use crate::outcomes::{discard_reason, DiscardReason}; #[derive(Debug, Serialize, Deserialize, Clone)] #[serde(rename_all = "lowercase")] @@ -135,7 +137,7 @@ struct MinimalProfile { } fn minimal_profile_from_json(data: &[u8]) -> Result { - serde_json::from_slice(data).map_err(ProfileError::InvalidJson) + serde_json::from_slice(data).map_err(ProfileError::InvalidJSON) } pub fn expand_profile(payload: &[u8]) -> Result>, ProfileError> { diff --git a/relay-profiling/src/outcomes.rs b/relay-profiling/src/outcomes.rs new file mode 100644 index 00000000000..3aa5cfe70c7 --- /dev/null +++ b/relay-profiling/src/outcomes.rs @@ -0,0 +1,44 @@ +use crate::ProfileError; + +#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] +#[allow(dead_code)] +pub enum DiscardReason { + FailedSerialization, + InvalidJSON, + InvalidProfileMetadata, + InvalidTransactionMetadata, + MalformedSamples, + MalformedStacks, + NoTransactionAssociated, + NotEnoughSamples, + Unknown, +} + +impl DiscardReason { + pub fn name(self) -> &'static str { + match self { + DiscardReason::FailedSerialization => "profiling_failed_serialization", + DiscardReason::InvalidJSON => "profiling_invalid_json", + DiscardReason::InvalidProfileMetadata => "profiling_invalid_profile_metadata", + DiscardReason::InvalidTransactionMetadata => "profiling_invalid_transaction_metadata", + DiscardReason::MalformedSamples => "profiling_malformed_samples", + DiscardReason::MalformedStacks => "profiling_malformed_stacks", + DiscardReason::NoTransactionAssociated => "profiling_no_transaction_associated", + DiscardReason::NotEnoughSamples => "profiling_not_enough_samples", + DiscardReason::Unknown => "profiling_unknown", + } + } +} + +pub fn discard_reason(err: ProfileError) -> DiscardReason { + match err { + ProfileError::CannotSerializePayload => DiscardReason::FailedSerialization, + ProfileError::NotEnoughSamples => DiscardReason::NotEnoughSamples, + ProfileError::NoTransactionAssociated => DiscardReason::NoTransactionAssociated, + ProfileError::InvalidTransactionMetadata => DiscardReason::InvalidTransactionMetadata, + ProfileError::MissingProfileMetadata => DiscardReason::InvalidProfileMetadata, + ProfileError::MalformedStacks => DiscardReason::MalformedStacks, + ProfileError::MalformedSamples => DiscardReason::MalformedSamples, + _ => DiscardReason::Unknown, + } +} diff --git a/relay-profiling/src/python.rs b/relay-profiling/src/python.rs index a40eb360382..3df011aca98 100644 --- a/relay-profiling/src/python.rs +++ b/relay-profiling/src/python.rs @@ -53,7 +53,7 @@ struct PythonProfile { pub fn parse_python_profile(payload: &[u8]) -> Result, ProfileError> { let profile: PythonProfile = - serde_json::from_slice(payload).map_err(ProfileError::InvalidJson)?; + serde_json::from_slice(payload).map_err(ProfileError::InvalidJSON)?; if profile.profile.samples.len() < 2 { return Err(ProfileError::NotEnoughSamples); diff --git a/relay-profiling/src/rust.rs b/relay-profiling/src/rust.rs index 721a4fb1ee4..3c88ba25b70 100644 --- a/relay-profiling/src/rust.rs +++ b/relay-profiling/src/rust.rs @@ -50,7 +50,7 @@ struct RustProfile { pub fn parse_rust_profile(payload: &[u8]) -> Result, ProfileError> { let profile: RustProfile = - serde_json::from_slice(payload).map_err(ProfileError::InvalidJson)?; + serde_json::from_slice(payload).map_err(ProfileError::InvalidJSON)?; if profile.sampled_profile.samples.is_empty() { return Err(ProfileError::NotEnoughSamples); diff --git a/relay-profiling/src/sample.rs b/relay-profiling/src/sample.rs index 582c3872621..62a0cd00999 100644 --- a/relay-profiling/src/sample.rs +++ b/relay-profiling/src/sample.rs @@ -42,7 +42,7 @@ impl Frame { #[derive(Debug, Serialize, Deserialize, Clone)] struct Sample { - stack_id: u32, + stack_id: usize, #[serde(deserialize_with = "deserialize_number_from_string")] thread_id: u64, #[serde(deserialize_with = "deserialize_number_from_string")] @@ -69,7 +69,7 @@ struct QueueMetadata { #[derive(Debug, Serialize, Deserialize, Clone)] struct Profile { samples: Vec, - stacks: Vec>, + stacks: Vec>, frames: Vec, #[serde(default, skip_serializing_if = "Option::is_none")] @@ -179,6 +179,26 @@ impl SampleProfile { } } + fn check_samples(&self) -> bool { + for sample in &self.profile.samples { + if self.profile.stacks.get(sample.stack_id).is_none() { + return false; + } + } + true + } + + fn check_stacks(&self) -> bool { + for stack in &self.profile.stacks { + for frame_id in stack { + if self.profile.frames.get(*frame_id).is_none() { + return false; + } + } + } + true + } + /// Removes a sample when it's the only sample on its thread fn remove_single_samples_per_thread(&mut self) { let mut sample_count_by_thread_id: HashMap = HashMap::new(); @@ -210,6 +230,14 @@ pub fn expand_sample_profile(payload: &[u8]) -> Result>, ProfileErro return Err(ProfileError::MissingProfileMetadata); } + if !profile.check_samples() { + return Err(ProfileError::MalformedSamples); + } + + if !profile.check_stacks() { + return Err(ProfileError::MalformedStacks); + } + let mut items: Vec> = Vec::new(); // As we're getting one profile for multiple transactions and our backend doesn't support this, @@ -233,6 +261,10 @@ pub fn expand_sample_profile(payload: &[u8]) -> Result>, ProfileErro } }); + if new_profile.profile.samples.is_empty() { + return Err(ProfileError::NotEnoughSamples); + } + match serde_json::to_vec(&new_profile) { Ok(payload) => items.push(payload), Err(_) => { @@ -246,7 +278,7 @@ pub fn expand_sample_profile(payload: &[u8]) -> Result>, ProfileErro fn parse_profile(payload: &[u8]) -> Result { let mut profile: SampleProfile = - serde_json::from_slice(payload).map_err(ProfileError::InvalidJson)?; + serde_json::from_slice(payload).map_err(ProfileError::InvalidJSON)?; profile .transactions @@ -409,6 +441,64 @@ mod tests { assert!(parse_profile(&payload[..]).is_err()); } + #[test] + fn test_expand_with_samples_outside_transaction() { + let mut profile = generate_profile(); + + profile.profile.frames.push(Frame { + abs_path: Some("".to_string()), + colno: Some(0), + filename: Some("".to_string()), + function: Some("".to_string()), + in_app: Some(false), + instruction_addr: Some(Addr(0)), + lineno: Some(0), + module: Some("".to_string()), + }); + profile.transactions.push(TransactionMetadata { + active_thread_id: 1, + id: EventId::new(), + name: "blah".to_string(), + relative_cpu_end_ms: 0, + relative_cpu_start_ms: 0, + relative_end_ns: 100, + relative_start_ns: 50, + trace_id: EventId::new(), + }); + profile.profile.stacks.push(vec![0]); + profile.profile.samples.extend(vec![ + Sample { + stack_id: 0, + queue_address: Some("0xdeadbeef".to_string()), + elapsed_since_start_ns: 10, + thread_id: 1, + }, + Sample { + stack_id: 0, + queue_address: Some("0xdeadbeef".to_string()), + elapsed_since_start_ns: 20, + thread_id: 1, + }, + Sample { + stack_id: 0, + queue_address: Some("0xdeadbeef".to_string()), + elapsed_since_start_ns: 30, + thread_id: 1, + }, + Sample { + stack_id: 0, + queue_address: Some("0xdeadbeef".to_string()), + elapsed_since_start_ns: 40, + thread_id: 1, + }, + ]); + + let payload = serde_json::to_vec(&profile).unwrap(); + let data = expand_sample_profile(&payload[..]); + + assert!(data.is_err()); + } + #[test] fn test_expand_multiple_transactions() { let payload = diff --git a/relay-profiling/src/typescript.rs b/relay-profiling/src/typescript.rs index 3b59364d782..91646e72095 100644 --- a/relay-profiling/src/typescript.rs +++ b/relay-profiling/src/typescript.rs @@ -33,7 +33,7 @@ struct TypescriptProfile { pub fn parse_typescript_profile(payload: &[u8]) -> Result, ProfileError> { let profile: TypescriptProfile = - serde_json::from_slice(payload).map_err(ProfileError::InvalidJson)?; + serde_json::from_slice(payload).map_err(ProfileError::InvalidJSON)?; if profile.profile.is_empty() { return Err(ProfileError::NotEnoughSamples); @@ -66,7 +66,7 @@ mod tests { } fn minimal_profile_from_json(data: &[u8]) -> Result { - serde_json::from_slice(data).map_err(ProfileError::InvalidJson) + serde_json::from_slice(data).map_err(ProfileError::InvalidJSON) } #[test] diff --git a/relay-server/src/actors/outcome.rs b/relay-server/src/actors/outcome.rs index 68a9f955d28..b4cd948c08d 100644 --- a/relay-server/src/actors/outcome.rs +++ b/relay-server/src/actors/outcome.rs @@ -356,15 +356,11 @@ pub enum DiscardReason { /// dynamic sampling rules. TransactionSampled, - /// (Relay) We failed to parse the profile so we discard the profile. - ProcessProfile, - - /// (Relay) The profile is parseable but semantically invalid. This could happen if - /// profiles lack sufficient samples. - InvalidProfile, - - // (Relay) We failed to parse the replay so we discard it. + /// (Relay) We failed to parse the replay so we discard it. InvalidReplayEvent, + + /// (Relay) Profiling related discard reasons + Profiling(relay_profiling::DiscardReason), } impl DiscardReason { @@ -385,7 +381,6 @@ impl DiscardReason { DiscardReason::SecurityReport => "security_report", DiscardReason::Cors => "cors", DiscardReason::ProcessUnreal => "process_unreal", - DiscardReason::ProcessProfile => "process_profile", // Relay specific reasons (not present in Sentry) DiscardReason::Payload => "payload", @@ -403,8 +398,8 @@ impl DiscardReason { DiscardReason::Internal => "internal", DiscardReason::TransactionSampled => "transaction_sampled", DiscardReason::EmptyEnvelope => "empty_envelope", - DiscardReason::InvalidProfile => "invalid_profile", DiscardReason::InvalidReplayEvent => "invalid_replay", + DiscardReason::Profiling(reason) => reason.name(), } } } diff --git a/relay-server/src/actors/processor.rs b/relay-server/src/actors/processor.rs index 77f766aa02c..0a25583ad3b 100644 --- a/relay-server/src/actors/processor.rs +++ b/relay-server/src/actors/processor.rs @@ -349,13 +349,107 @@ fn outcome_from_parts(field: ClientReportField, reason: &str) -> Result Outcome { - let discard_reason = match err { - relay_profiling::ProfileError::CannotSerializePayload => DiscardReason::Internal, - relay_profiling::ProfileError::NotEnoughSamples => DiscardReason::InvalidProfile, - _ => DiscardReason::ProcessProfile, +fn track_sampling_metrics( + project_state: &ProjectState, + context: &DynamicSamplingContext, + event: &Event, +) { + // We only collect this metric for the root transaction event, so ignore secondary projects. + if !project_state.is_matching_key(context.public_key) { + return; + } + + let transaction_info = match event.transaction_info.value() { + Some(info) => info, + None => return, }; - Outcome::Invalid(discard_reason) + + let changes = match transaction_info.changes.value() { + Some(value) => value.as_slice(), + None => return, + }; + + let last_change = changes + .iter() + .rev() + // skip all broken change records + .filter_map(|a| a.value()) + // skip records without a timestamp + .filter(|c| c.timestamp.value().is_some()) + // take the last that did not occur when the event was sent + .find(|c| c.timestamp.value() != event.timestamp.value()); + + let source = event.get_transaction_source().as_str(); + let platform = event.platform.as_str().unwrap_or("other"); + let sdk_name = event.sdk_name(); + let sdk_version = event.sdk_version(); + + metric!( + histogram(RelayHistograms::DynamicSamplingChanges) = changes.len() as u64, + source = source, + platform = platform, + sdk_name = sdk_name, + sdk_version = sdk_version, + ); + + if let Some(&total) = transaction_info.propagations.value() { + // If there was no change, there were no propagations that happened with a wrong name. + let change = last_change + .and_then(|c| c.propagations.value()) + .map_or(0, |v| *v); + + metric!( + histogram(RelayHistograms::DynamicSamplingPropagationCount) = change, + source = source, + platform = platform, + sdk_name = sdk_name, + sdk_version = sdk_version, + ); + + let percentage = match (change, total) { + (0, 0) => 0.0, // 0% indicates no premature changes. + _ => ((change as f64) / (total as f64)).min(1.0) * 100.0, + }; + + metric!( + histogram(RelayHistograms::DynamicSamplingPropagationPercentage) = percentage, + source = source, + platform = platform, + sdk_name = sdk_name, + sdk_version = sdk_version, + ); + } + + if let (Some(&start), Some(&change), Some(&end)) = ( + event.start_timestamp.value(), + last_change + .and_then(|c| c.timestamp.value()) + .or_else(|| event.start_timestamp.value()), // default to start if there was no change + event.timestamp.value(), + ) { + let delay_ms = (change - start).num_milliseconds(); + if delay_ms >= 0 { + metric!( + histogram(RelayHistograms::DynamicSamplingChangeDuration) = delay_ms as u64, + source = source, + platform = platform, + sdk_name = sdk_name, + sdk_version = sdk_version, + ); + } + + let duration_ms = (end - start).num_milliseconds() as f64; + if delay_ms >= 0 && duration_ms >= 0.0 { + let percentage = ((delay_ms as f64) / duration_ms).min(1.0) * 100.0; + metric!( + histogram(RelayHistograms::DynamicSamplingChangePercentage) = percentage, + source = source, + platform = platform, + sdk_name = sdk_name, + sdk_version = sdk_version, + ); + } + } } /// Response of the [`ProcessEnvelope`] message. @@ -971,9 +1065,16 @@ impl EnvelopeProcessorService { match relay_profiling::expand_profile(&item.payload()[..]) { Ok(payloads) => new_profiles.extend(payloads), Err(err) => { - relay_log::debug!("invalid profile: {:#?}", err); + match err { + relay_profiling::ProfileError::InvalidJSON(_) => { + relay_log::error!("invalid profile: {}", LogError(&err)); + } + _ => relay_log::debug!("invalid profile: {}", err), + }; context.track_outcome( - outcome_from_profile_error(err), + Outcome::Invalid(DiscardReason::Profiling( + relay_profiling::discard_reason(err), + )), DataCategory::Profile, 1, );