From e77583827aedb3a4df491d38c9a98cb26dc6ca0b Mon Sep 17 00:00:00 2001 From: Joshua Ferge Date: Wed, 31 May 2023 15:51:48 -0700 Subject: [PATCH 01/72] feat(replay): combined envelope items --- relay-dynamic-config/src/feature.rs | 6 ++ relay-server/src/actors/processor.rs | 128 ++++++++++++++++++++++++++ relay-server/src/actors/store.rs | 49 ++++++++++ relay-server/src/envelope.rs | 12 ++- relay-server/src/utils/rate_limits.rs | 1 + relay-server/src/utils/sizes.rs | 3 + 6 files changed, 198 insertions(+), 1 deletion(-) diff --git a/relay-dynamic-config/src/feature.rs b/relay-dynamic-config/src/feature.rs index 9a0006d9b0..c2bca0d317 100644 --- a/relay-dynamic-config/src/feature.rs +++ b/relay-dynamic-config/src/feature.rs @@ -9,6 +9,9 @@ pub enum Feature { SessionReplay, /// Enables data scrubbing of replay recording payloads. SessionReplayRecordingScrubbing, + /// Enables combining session replay envelope item (Replay Recordings and Replay Events). + /// into one item. + SessionReplayCombinedEnvelopeItems, /// Enables device.class synthesis /// /// Enables device.class tag synthesis on mobile events. @@ -47,6 +50,9 @@ impl Serialize for Feature { Feature::SessionReplayRecordingScrubbing => { "organizations:session-replay-recording-scrubbing" } + Feature::SessionReplayCombinedEnvelopeItems => { + "organizations:session-replay-combined-envelope-items" + } Feature::DeviceClassSynthesis => "organizations:device-class-synthesis", Feature::SpanMetricsExtraction => "projects:span-metrics-extraction", Feature::Unknown(s) => s, diff --git a/relay-server/src/actors/processor.rs b/relay-server/src/actors/processor.rs index 50c5d1cfef..ec1ea1a844 100644 --- a/relay-server/src/actors/processor.rs +++ b/relay-server/src/actors/processor.rs @@ -1215,6 +1215,8 @@ impl EnvelopeProcessorService { let project_state = &state.project_state; let replays_enabled = project_state.has_feature(Feature::SessionReplay); let scrubbing_enabled = project_state.has_feature(Feature::SessionReplayRecordingScrubbing); + let combined_envelope_items = + project_state.has_feature(Feature::SessionReplayCombinedEnvelopeItems); let meta = state.envelope().meta().clone(); let client_addr = meta.client_addr(); @@ -1301,6 +1303,36 @@ impl EnvelopeProcessorService { _ => ItemAction::Keep, }); + if combined_envelope_items { + // If this flag is enabled, combine both items into a single item, + // and remove the original items. + // The combined Item's payload is a MsgPack map with the keys + // "replay_event" and "replay_recording". + // The values are the original payloads of the items. + let envelope = &mut state.envelope_mut(); + if let Some(replay_event_item) = + envelope.take_item_by(|item| item.ty() == &ItemType::ReplayEvent) + { + if let Some(replay_recording_item) = + envelope.take_item_by(|item| item.ty() == &ItemType::ReplayRecording) + { + let mut data = Vec::new(); + let mut combined_item_payload = BTreeMap::new(); + combined_item_payload.insert("replay_event", replay_event_item.payload()); + combined_item_payload + .insert("replay_recording", replay_recording_item.payload()); + rmp_serde::encode::write(&mut data, &combined_item_payload) + .expect("write msgpack"); + + let mut combined_item = Item::new(ItemType::CombinedReplayEventAndRecording); + combined_item.set_payload(ContentType::MsgPack, data); + envelope.add_item(combined_item); + } else { + envelope.add_item(replay_event_item) + } + } + } + Ok(()) } @@ -1665,6 +1697,7 @@ impl EnvelopeProcessorService { ItemType::Profile => false, ItemType::ReplayEvent => false, ItemType::ReplayRecording => false, + ItemType::CombinedReplayEventAndRecording => false, ItemType::CheckIn => false, // Without knowing more, `Unknown` items are allowed to be repeated @@ -3178,6 +3211,101 @@ mod tests { assert_eq!(new_envelope.items().next().unwrap().ty(), &ItemType::Event); } + #[tokio::test] + async fn test_replays_combined_payload() { + let processor = create_test_processor(Default::default()); + let (outcome_aggregator, test_store) = services(); + let event_id = protocol::EventId::new(); + + let dsn = "https://e12d836b15bb49d7bbf99e64295d995b:@sentry.io/42" + .parse() + .unwrap(); + + let request_meta = RequestMeta::new(dsn); + let mut envelope = Envelope::from_request(Some(event_id), request_meta); + + envelope.add_item({ + let mut item = Item::new(ItemType::ReplayRecording); + item.set_payload(ContentType::OctetStream, r###"{"foo": "bar"}"###); + item + }); + + envelope.add_item({ + let mut item = Item::new(ItemType::ReplayEvent); + item.set_payload(ContentType::Json, r#"{ + "type": "replay_event", + "replay_id": "d2132d31b39445f1938d7e21b6bf0ec4", + "replay_type": "session", + "event_id": "d2132d31b39445f1938d7e21b6bf0ec4", + "segment_id": 0, + "timestamp": 1597977777.6189718, + "replay_start_timestamp": 1597976392.6542819, + "urls": ["sentry.io"], + "error_ids": ["1", "2"], + "trace_ids": ["3", "4"], + "dist": "1.12", + "platform": "javascript", + "environment": "production", + "release": 42, + "tags": { + "transaction": "/organizations/:orgId/performance/:eventSlug/" + }, + "sdk": { + "name": "name", + "version": "veresion" + }, + "user": { + "id": "123", + "username": "user", + "email": "user@site.com", + "ip_address": "192.168.11.12" + }, + "request": { + "url": null, + "headers": { + "user-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.5 Safari/605.1.15" + } + }, + "contexts": { + "trace": { + "trace_id": "4C79F60C11214EB38604F4AE0781BFB2", + "span_id": "FA90FDEAD5F74052", + "type": "trace" + }, + "replay": { + "error_sample_rate": 0.125, + "session_sample_rate": 0.5 + } + } + }"#); + item + }); + + let mut project_state = ProjectState::allowed(); + project_state.config.features.insert(Feature::SessionReplay); + project_state + .config + .features + .insert(Feature::SessionReplayCombinedEnvelopeItems); + + let message = ProcessEnvelope { + envelope: ManagedEnvelope::standalone(envelope, outcome_aggregator, test_store), + project_state: Arc::new(project_state), + sampling_project_state: None, + }; + + let envelope_response = processor.process(message).unwrap(); + let ctx = envelope_response.envelope.unwrap(); + let new_envelope = ctx.envelope(); + + assert_eq!(new_envelope.len(), 1); + + assert_eq!( + new_envelope.items().next().unwrap().ty(), + &ItemType::CombinedReplayEventAndRecording + ); + } + fn process_envelope_with_root_project_state( envelope: Box, sampling_project_state: Option>, diff --git a/relay-server/src/actors/store.rs b/relay-server/src/actors/store.rs index ad032add81..c33cf3e605 100644 --- a/relay-server/src/actors/store.rs +++ b/relay-server/src/actors/store.rs @@ -194,6 +194,14 @@ impl StoreService { retention, item, )?, + ItemType::CombinedReplayEventAndRecording => self + .produce_combined_replay_event_and_recording( + event_id.ok_or(StoreError::NoEventId)?, + scoping.organization_id, + scoping.project_id, + retention, + item, + )?, ItemType::CheckIn => self.produce_check_in( scoping.organization_id, scoping.project_id, @@ -781,6 +789,31 @@ impl StoreService { }) } + fn produce_combined_replay_event_and_recording( + &self, + replay_id: EventId, + organization_id: u64, + project_id: ProjectId, + retention_days: u16, + item: &Item, + ) -> Result<(), StoreError> { + let message = KafkaMessage::CombinedReplayEventAndRecording( + CombinedReplayEventAndRecordingKafkaMessage { + replay_id, + project_id, + retention_days, + payload: item.payload(), + }, + ); + + self.produce(KafkaTopic::ReplayRecordings, organization_id, message)?; + + metric!( + counter(RelayCounters::ProcessingMessageProduced) += 1, + event_type = "combined_replay_event_and_recording" + ); + Ok(()) + } fn produce_check_in( &self, organization_id: u64, @@ -952,6 +985,17 @@ struct ReplayRecordingChunkKafkaMessage { /// the tuple (id, chunk_index) is the unique identifier for a single chunk. chunk_index: usize, } +#[derive(Debug, Serialize)] +struct CombinedReplayEventAndRecordingKafkaMessage { + /// Raw event payload. + payload: Bytes, + /// The event id. + replay_id: EventId, + /// The project id for the current event. + project_id: ProjectId, + // Number of days to retain. + retention_days: u16, +} #[derive(Debug, Serialize)] struct ReplayRecordingChunkMeta { @@ -1081,6 +1125,7 @@ enum KafkaMessage { ReplayRecordingNotChunked(ReplayRecordingNotChunkedKafkaMessage), ReplayRecording(ReplayRecordingKafkaMessage), ReplayRecordingChunk(ReplayRecordingChunkKafkaMessage), + CombinedReplayEventAndRecording(CombinedReplayEventAndRecordingKafkaMessage), CheckIn(CheckInKafkaMessage), } @@ -1098,6 +1143,9 @@ impl Message for KafkaMessage { KafkaMessage::ReplayRecording(_) => "replay_recording", KafkaMessage::ReplayRecordingChunk(_) => "replay_recording_chunk", KafkaMessage::ReplayRecordingNotChunked(_) => "replay_recording_not_chunked", + KafkaMessage::CombinedReplayEventAndRecording(_) => { + "combined_replay_event_and_recording" + } KafkaMessage::CheckIn(_) => "check_in", } } @@ -1116,6 +1164,7 @@ impl Message for KafkaMessage { Self::ReplayRecording(message) => message.replay_id.0, Self::ReplayRecordingChunk(message) => message.replay_id.0, Self::ReplayRecordingNotChunked(_message) => Uuid::nil(), // Ensure random partitioning. + Self::CombinedReplayEventAndRecording(_message) => Uuid::nil(), // Ensure random partitioning. Self::CheckIn(_message) => Uuid::nil(), }; diff --git a/relay-server/src/envelope.rs b/relay-server/src/envelope.rs index aa2260c175..ca0acad466 100644 --- a/relay-server/src/envelope.rs +++ b/relay-server/src/envelope.rs @@ -107,6 +107,8 @@ pub enum ItemType { ReplayEvent, /// Replay Recording data. ReplayRecording, + /// Combined Replay metadata and Recording Payload + CombinedReplayEventAndRecording, /// Monitor check-in encoded as JSON. CheckIn, /// A new item type that is yet unknown by this version of Relay. @@ -150,6 +152,9 @@ impl fmt::Display for ItemType { Self::Profile => write!(f, "profile"), Self::ReplayEvent => write!(f, "replay_event"), Self::ReplayRecording => write!(f, "replay_recording"), + Self::CombinedReplayEventAndRecording => { + write!(f, "combined_replay_event_and_recording") + } Self::CheckIn => write!(f, "check_in"), Self::Unknown(s) => s.fmt(f), } @@ -565,7 +570,10 @@ impl Item { } else { DataCategory::Profile }), - ItemType::ReplayEvent | ItemType::ReplayRecording => Some(DataCategory::Replay), + ItemType::ReplayEvent + | ItemType::ReplayRecording + | ItemType::CombinedReplayEventAndRecording => Some(DataCategory::Replay), + ItemType::ClientReport => None, ItemType::CheckIn => Some(DataCategory::Monitor), ItemType::Unknown(_) => None, @@ -746,6 +754,7 @@ impl Item { | ItemType::ClientReport | ItemType::ReplayEvent | ItemType::ReplayRecording + | ItemType::CombinedReplayEventAndRecording | ItemType::Profile | ItemType::CheckIn => false, @@ -775,6 +784,7 @@ impl Item { ItemType::MetricBuckets => false, ItemType::ClientReport => false, ItemType::ReplayRecording => false, + ItemType::CombinedReplayEventAndRecording => false, ItemType::Profile => true, ItemType::CheckIn => false, diff --git a/relay-server/src/utils/rate_limits.rs b/relay-server/src/utils/rate_limits.rs index 167f1ea24f..c5ec662eec 100644 --- a/relay-server/src/utils/rate_limits.rs +++ b/relay-server/src/utils/rate_limits.rs @@ -106,6 +106,7 @@ fn infer_event_category(item: &Item) -> Option { ItemType::Profile => None, ItemType::ReplayEvent => None, ItemType::ReplayRecording => None, + ItemType::CombinedReplayEventAndRecording => None, ItemType::ClientReport => None, ItemType::CheckIn => None, ItemType::Unknown(_) => None, diff --git a/relay-server/src/utils/sizes.rs b/relay-server/src/utils/sizes.rs index 39e4156b88..1b62946b37 100644 --- a/relay-server/src/utils/sizes.rs +++ b/relay-server/src/utils/sizes.rs @@ -63,6 +63,9 @@ pub fn check_envelope_size_limits(config: &Config, envelope: &Envelope) -> bool ItemType::UserReport => (), ItemType::Metrics => (), ItemType::MetricBuckets => (), + // The Combined Replay Envelope isn't generated on the client so its size does not need + // to be checked. + ItemType::CombinedReplayEventAndRecording => (), ItemType::Unknown(_) => (), } } From dc26a9db9bb8f74a359592b364a460a54de8cd6a Mon Sep 17 00:00:00 2001 From: Joshua Ferge Date: Wed, 31 May 2023 16:19:40 -0700 Subject: [PATCH 02/72] only run in processing mode --- relay-server/src/actors/processor.rs | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/relay-server/src/actors/processor.rs b/relay-server/src/actors/processor.rs index ec1ea1a844..b00bcd94ab 100644 --- a/relay-server/src/actors/processor.rs +++ b/relay-server/src/actors/processor.rs @@ -1215,8 +1215,6 @@ impl EnvelopeProcessorService { let project_state = &state.project_state; let replays_enabled = project_state.has_feature(Feature::SessionReplay); let scrubbing_enabled = project_state.has_feature(Feature::SessionReplayRecordingScrubbing); - let combined_envelope_items = - project_state.has_feature(Feature::SessionReplayCombinedEnvelopeItems); let meta = state.envelope().meta().clone(); let client_addr = meta.client_addr(); @@ -1303,6 +1301,18 @@ impl EnvelopeProcessorService { _ => ItemAction::Keep, }); + Ok(()) + } + + #[cfg(feature = "processing")] + fn process_replays_combine_items( + &self, + state: &mut ProcessEnvelopeState, + ) -> Result<(), ProcessingError> { + let project_state = &state.project_state; + let combined_envelope_items = + project_state.has_feature(Feature::SessionReplayCombinedEnvelopeItems); + if combined_envelope_items { // If this flag is enabled, combine both items into a single item, // and remove the original items. @@ -1321,10 +1331,9 @@ impl EnvelopeProcessorService { combined_item_payload.insert("replay_event", replay_event_item.payload()); combined_item_payload .insert("replay_recording", replay_recording_item.payload()); - rmp_serde::encode::write(&mut data, &combined_item_payload) - .expect("write msgpack"); - + rmp_serde::encode::write(&mut data, &combined_item_payload).expect("msg"); let mut combined_item = Item::new(ItemType::CombinedReplayEventAndRecording); + combined_item.set_payload(ContentType::MsgPack, data); envelope.add_item(combined_item); } else { @@ -1332,7 +1341,6 @@ impl EnvelopeProcessorService { } } } - Ok(()) } @@ -2459,6 +2467,8 @@ impl EnvelopeProcessorService { self.process_client_reports(state); self.process_user_reports(state); self.process_replays(state)?; + if_processing!({ self.process_replays_combine_items(state)? }); + self.filter_profiles(state); // After filtering, we need to update the envelope summary: @@ -3212,6 +3222,7 @@ mod tests { } #[tokio::test] + #[cfg(feature = "processing")] async fn test_replays_combined_payload() { let processor = create_test_processor(Default::default()); let (outcome_aggregator, test_store) = services(); From 7033ae0abecc99034b660fc1c57134e7e2e1e96f Mon Sep 17 00:00:00 2001 From: Joshua Ferge Date: Tue, 6 Jun 2023 17:18:44 -0700 Subject: [PATCH 03/72] add integration test. fix processing mode stuff, add store logic --- relay-dynamic-config/src/feature.rs | 3 + relay-general/src/protocol/replay.rs | 1 - relay-replays/src/recording.rs | 1 + relay-server/src/actors/processor.rs | 4 +- relay-server/src/actors/store.rs | 29 +++-- relay-server/src/envelope.rs | 3 +- tests/integration/fixtures/processing.py | 4 +- .../test_replay_combined_payload.py | 100 ++++++++++++++++++ tests/integration/test_replay_events.py | 8 +- 9 files changed, 135 insertions(+), 18 deletions(-) create mode 100644 tests/integration/test_replay_combined_payload.py diff --git a/relay-dynamic-config/src/feature.rs b/relay-dynamic-config/src/feature.rs index c2bca0d317..8720b63fe1 100644 --- a/relay-dynamic-config/src/feature.rs +++ b/relay-dynamic-config/src/feature.rs @@ -30,6 +30,9 @@ impl<'de> Deserialize<'de> for Feature { let feature_name = Cow::::deserialize(deserializer)?; Ok(match feature_name.as_ref() { "organizations:session-replay" => Feature::SessionReplay, + "organizations:session-replay-combined-envelope-items" => { + Feature::SessionReplayCombinedEnvelopeItems + } "organizations:session-replay-recording-scrubbing" => { Feature::SessionReplayRecordingScrubbing } diff --git a/relay-general/src/protocol/replay.rs b/relay-general/src/protocol/replay.rs index c1f5f0f200..29bdc86eda 100644 --- a/relay-general/src/protocol/replay.rs +++ b/relay-general/src/protocol/replay.rs @@ -288,7 +288,6 @@ impl Replay { } else { &user_agent_info }; - let contexts = self.contexts.get_or_insert_with(|| Contexts::new()); user_agent::normalize_user_agent_info_generic(contexts, &self.platform, user_agent_info); } diff --git a/relay-replays/src/recording.rs b/relay-replays/src/recording.rs index a8d684bd17..c7f8046400 100644 --- a/relay-replays/src/recording.rs +++ b/relay-replays/src/recording.rs @@ -11,6 +11,7 @@ //! identified by `type: 5`. The scrubber skips all other node types and does not perform any //! validation beyond JSON parsing. +// use bytes::Bytes; use std::borrow::Cow; use std::cell::RefCell; use std::fmt; diff --git a/relay-server/src/actors/processor.rs b/relay-server/src/actors/processor.rs index b00bcd94ab..34ae1e5c7f 100644 --- a/relay-server/src/actors/processor.rs +++ b/relay-server/src/actors/processor.rs @@ -1328,10 +1328,12 @@ impl EnvelopeProcessorService { { let mut data = Vec::new(); let mut combined_item_payload = BTreeMap::new(); + combined_item_payload.insert("replay_event", replay_event_item.payload()); combined_item_payload .insert("replay_recording", replay_recording_item.payload()); rmp_serde::encode::write(&mut data, &combined_item_payload).expect("msg"); + let mut combined_item = Item::new(ItemType::CombinedReplayEventAndRecording); combined_item.set_payload(ContentType::MsgPack, data); @@ -2466,7 +2468,7 @@ impl EnvelopeProcessorService { self.process_sessions(state); self.process_client_reports(state); self.process_user_reports(state); - self.process_replays(state)?; + self.process_replays(state); if_processing!({ self.process_replays_combine_items(state)? }); self.filter_profiles(state); diff --git a/relay-server/src/actors/store.rs b/relay-server/src/actors/store.rs index c33cf3e605..e14b60f220 100644 --- a/relay-server/src/actors/store.rs +++ b/relay-server/src/actors/store.rs @@ -199,7 +199,9 @@ impl StoreService { event_id.ok_or(StoreError::NoEventId)?, scoping.organization_id, scoping.project_id, + scoping.key_id, retention, + start_time, item, )?, ItemType::CheckIn => self.produce_check_in( @@ -692,6 +694,7 @@ impl StoreService { received: UnixTimestamp::from_instant(start_time).as_secs(), retention_days: retention, payload: item.payload(), + version: None, }); self.produce( @@ -794,23 +797,28 @@ impl StoreService { replay_id: EventId, organization_id: u64, project_id: ProjectId, + key_id: Option, retention_days: u16, + start_time: Instant, item: &Item, ) -> Result<(), StoreError> { - let message = KafkaMessage::CombinedReplayEventAndRecording( - CombinedReplayEventAndRecordingKafkaMessage { + let message = + KafkaMessage::ReplayRecordingNotChunked(ReplayRecordingNotChunkedKafkaMessage { replay_id, project_id, + org_id: organization_id, + key_id, retention_days, + received: UnixTimestamp::from_instant(start_time).as_secs(), + version: Some(1), payload: item.payload(), - }, - ); + }); self.produce(KafkaTopic::ReplayRecordings, organization_id, message)?; metric!( counter(RelayCounters::ProcessingMessageProduced) += 1, - event_type = "combined_replay_event_and_recording" + event_type = "replay_recording_not_chunked" ); Ok(()) } @@ -993,6 +1001,11 @@ struct CombinedReplayEventAndRecordingKafkaMessage { replay_id: EventId, /// The project id for the current event. project_id: ProjectId, + /// The project id for the current event. + org_id: u64, + /// The timestamp of when the recording was Received by relay + received: u64, + version: u8, // Number of days to retain. retention_days: u16, } @@ -1037,6 +1050,7 @@ struct ReplayRecordingNotChunkedKafkaMessage { project_id: ProjectId, received: u64, retention_days: u16, + version: Option, payload: Bytes, } @@ -1125,7 +1139,6 @@ enum KafkaMessage { ReplayRecordingNotChunked(ReplayRecordingNotChunkedKafkaMessage), ReplayRecording(ReplayRecordingKafkaMessage), ReplayRecordingChunk(ReplayRecordingChunkKafkaMessage), - CombinedReplayEventAndRecording(CombinedReplayEventAndRecordingKafkaMessage), CheckIn(CheckInKafkaMessage), } @@ -1143,9 +1156,6 @@ impl Message for KafkaMessage { KafkaMessage::ReplayRecording(_) => "replay_recording", KafkaMessage::ReplayRecordingChunk(_) => "replay_recording_chunk", KafkaMessage::ReplayRecordingNotChunked(_) => "replay_recording_not_chunked", - KafkaMessage::CombinedReplayEventAndRecording(_) => { - "combined_replay_event_and_recording" - } KafkaMessage::CheckIn(_) => "check_in", } } @@ -1164,7 +1174,6 @@ impl Message for KafkaMessage { Self::ReplayRecording(message) => message.replay_id.0, Self::ReplayRecordingChunk(message) => message.replay_id.0, Self::ReplayRecordingNotChunked(_message) => Uuid::nil(), // Ensure random partitioning. - Self::CombinedReplayEventAndRecording(_message) => Uuid::nil(), // Ensure random partitioning. Self::CheckIn(_message) => Uuid::nil(), }; diff --git a/relay-server/src/envelope.rs b/relay-server/src/envelope.rs index ca0acad466..d64bc3b10b 100644 --- a/relay-server/src/envelope.rs +++ b/relay-server/src/envelope.rs @@ -108,6 +108,7 @@ pub enum ItemType { /// Replay Recording data. ReplayRecording, /// Combined Replay metadata and Recording Payload + #[cfg(feature = "processing")] CombinedReplayEventAndRecording, /// Monitor check-in encoded as JSON. CheckIn, @@ -777,7 +778,7 @@ impl Item { ItemType::RawSecurity => true, ItemType::UnrealReport => true, ItemType::UserReport => true, - ItemType::ReplayEvent => true, + ItemType::ReplayEvent => false, ItemType::Session => false, ItemType::Sessions => false, ItemType::Metrics => false, diff --git a/tests/integration/fixtures/processing.py b/tests/integration/fixtures/processing.py index 2660db1b3d..af259e4665 100644 --- a/tests/integration/fixtures/processing.py +++ b/tests/integration/fixtures/processing.py @@ -283,7 +283,9 @@ def metrics_consumer(kafka_consumer): @pytest.fixture def replay_recordings_consumer(kafka_consumer): - return lambda: ReplayRecordingsConsumer(*kafka_consumer("replay_recordings")) + return lambda timeout=None: ReplayRecordingsConsumer( + timeout=timeout, *kafka_consumer("replay_recordings") + ) @pytest.fixture diff --git a/tests/integration/test_replay_combined_payload.py b/tests/integration/test_replay_combined_payload.py new file mode 100644 index 0000000000..99070a6d7e --- /dev/null +++ b/tests/integration/test_replay_combined_payload.py @@ -0,0 +1,100 @@ +# import pytest +# import zlib + +from sentry_sdk.envelope import Envelope, Item, PayloadRef +import msgpack +from .test_replay_events import generate_replay_sdk_event +import json + +# def test_replay_recordings(mini_sentry, relay_chain): +# relay = relay_chain(min_relay_version="latest") + +# project_id = 42 +# mini_sentry.add_basic_project_config( +# project_id, extra={"config": {"features": ["organizations:session-replay"]}} +# ) + +# replay_id = "515539018c9b4260a6f999572f1661ee" + +# replay_event = generate_replay_sdk_event(replay_id=replay_id) + +# envelope = Envelope(headers=[["event_id", replay_id]]) +# envelope.add_item( +# Item(payload=PayloadRef(bytes=b"{}\n[]"), type="replay_recording") +# ) +# envelope.add_item(Item(payload=PayloadRef(json=replay_event), type="replay_event")) + +# relay.send_envelope(project_id, envelope) + +# envelope = mini_sentry.captured_events.get(timeout=1) +# assert len(envelope.items) == 1 + +# replay_combined_item = envelope.items[0] +# assert session_item.type == "replay_recording" + +# replay_recording = session_item.get_bytes() +# assert replay_recording.startswith(b"{}\n") # The body is compressed + + +def test_replay_combined_with_processing( + mini_sentry, relay_with_processing, replay_recordings_consumer +): + replay_recording_bytes = b"{}\n[]" + relay = relay_with_processing() + replay_recordings_consumer = replay_recordings_consumer(timeout=10) + + mini_sentry.add_basic_project_config( + 42, + extra={ + "config": { + "features": [ + "organizations:session-replay", + "organizations:session-replay-combined-envelope-items", + ] + } + }, + ) + + replay_id = "515539018c9b4260a6f999572f1661ee" + + replay_event = generate_replay_sdk_event(replay_id=replay_id) + + envelope = Envelope(headers=[["event_id", replay_id]]) + envelope.add_item( + Item(payload=PayloadRef(bytes=replay_recording_bytes), type="replay_recording") + ) + envelope.add_item(Item(payload=PayloadRef(json=replay_event), type="replay_event")) + + relay.send_envelope(42, envelope) + + combined_replay_message = replay_recordings_consumer.get_not_chunked_replay() + assert combined_replay_message["type"] == "replay_recording_not_chunked" + assert combined_replay_message["replay_id"] == "515539018c9b4260a6f999572f1661ee" + assert combined_replay_message["version"] == 1 + + payload = msgpack.unpackb(combined_replay_message["payload"]) + + replay_event = json.loads(payload["replay_event"]) + assert replay_event["replay_id"] == "515539018c9b4260a6f999572f1661ee" + + assert payload["replay_recording"] == replay_recording_bytes + # breakpoint() + + +# def test_replay_events_without_processing(mini_sentry, relay_chain): +# relay = relay_chain(min_relay_version="latest") + +# project_id = 42 +# mini_sentry.add_basic_project_config( +# project_id, extra={"config": {"features": ["organizations:session-replay"]}} +# ) + +# replay_item = generate_replay_sdk_event() + +# relay.send_replay_event(42, replay_item) + +# envelope = mini_sentry.captured_events.get(timeout=20) +# assert len(envelope.items) == 1 + +# replay_event = envelope.items[0] +# assert replay_event.type == "replay_event" diff --git a/tests/integration/test_replay_events.py b/tests/integration/test_replay_events.py index 17676206be..14d8f35da5 100644 --- a/tests/integration/test_replay_events.py +++ b/tests/integration/test_replay_events.py @@ -1,12 +1,12 @@ import json -def generate_replay_sdk_event(): +def generate_replay_sdk_event(replay_id="d2132d31b39445f1938d7e21b6bf0ec4"): return { "type": "replay_event", - "replay_id": "d2132d31b39445f1938d7e21b6bf0ec4", + "replay_id": replay_id, "replay_type": "session", - "event_id": "d2132d31b39445f1938d7e21b6bf0ec4", + "event_id": replay_id, "segment_id": 0, "timestamp": 1597977777.6189718, "replay_start_timestamp": 1597976392.6542819, @@ -28,7 +28,7 @@ def generate_replay_sdk_event(): "request": { "url": None, "headers": { - "user-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.5 Safari/605.1.15" + "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.5 Safari/605.1.15" }, }, "contexts": { From 2c9d2948ab9e3de2ad2dac69a4927f2071b79fdc Mon Sep 17 00:00:00 2001 From: Joshua Ferge Date: Tue, 6 Jun 2023 17:36:01 -0700 Subject: [PATCH 04/72] remove processing mode business, use vec instead of Bytes --- relay-server/src/actors/processor.rs | 10 +++++----- relay-server/src/envelope.rs | 1 - 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/relay-server/src/actors/processor.rs b/relay-server/src/actors/processor.rs index 34ae1e5c7f..e150947e6b 100644 --- a/relay-server/src/actors/processor.rs +++ b/relay-server/src/actors/processor.rs @@ -1304,7 +1304,6 @@ impl EnvelopeProcessorService { Ok(()) } - #[cfg(feature = "processing")] fn process_replays_combine_items( &self, state: &mut ProcessEnvelopeState, @@ -1329,9 +1328,10 @@ impl EnvelopeProcessorService { let mut data = Vec::new(); let mut combined_item_payload = BTreeMap::new(); - combined_item_payload.insert("replay_event", replay_event_item.payload()); combined_item_payload - .insert("replay_recording", replay_recording_item.payload()); + .insert("replay_event", replay_event_item.payload().to_vec()); + combined_item_payload + .insert("replay_recording", replay_recording_item.payload().to_vec()); rmp_serde::encode::write(&mut data, &combined_item_payload).expect("msg"); let mut combined_item = Item::new(ItemType::CombinedReplayEventAndRecording); @@ -2468,8 +2468,8 @@ impl EnvelopeProcessorService { self.process_sessions(state); self.process_client_reports(state); self.process_user_reports(state); - self.process_replays(state); - if_processing!({ self.process_replays_combine_items(state)? }); + self.process_replays(state)?; + self.process_replays_combine_items(state)?; self.filter_profiles(state); diff --git a/relay-server/src/envelope.rs b/relay-server/src/envelope.rs index d64bc3b10b..f2d0e4fa3e 100644 --- a/relay-server/src/envelope.rs +++ b/relay-server/src/envelope.rs @@ -108,7 +108,6 @@ pub enum ItemType { /// Replay Recording data. ReplayRecording, /// Combined Replay metadata and Recording Payload - #[cfg(feature = "processing")] CombinedReplayEventAndRecording, /// Monitor check-in encoded as JSON. CheckIn, From aa0c0ce96fa5a68ab024cda4c86fec41169944ab Mon Sep 17 00:00:00 2001 From: Joshua Ferge Date: Tue, 6 Jun 2023 17:43:32 -0700 Subject: [PATCH 05/72] fix args in combined store func --- relay-server/src/actors/store.rs | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/relay-server/src/actors/store.rs b/relay-server/src/actors/store.rs index e14b60f220..f4227671e6 100644 --- a/relay-server/src/actors/store.rs +++ b/relay-server/src/actors/store.rs @@ -197,9 +197,7 @@ impl StoreService { ItemType::CombinedReplayEventAndRecording => self .produce_combined_replay_event_and_recording( event_id.ok_or(StoreError::NoEventId)?, - scoping.organization_id, - scoping.project_id, - scoping.key_id, + scoping, retention, start_time, item, @@ -795,9 +793,7 @@ impl StoreService { fn produce_combined_replay_event_and_recording( &self, replay_id: EventId, - organization_id: u64, - project_id: ProjectId, - key_id: Option, + scoping: Scoping, retention_days: u16, start_time: Instant, item: &Item, @@ -805,16 +801,20 @@ impl StoreService { let message = KafkaMessage::ReplayRecordingNotChunked(ReplayRecordingNotChunkedKafkaMessage { replay_id, - project_id, - org_id: organization_id, - key_id, + project_id: scoping.project_id, + org_id: scoping.organization_id, + key_id: scoping.key_id, retention_days, received: UnixTimestamp::from_instant(start_time).as_secs(), version: Some(1), payload: item.payload(), }); - self.produce(KafkaTopic::ReplayRecordings, organization_id, message)?; + self.produce( + KafkaTopic::ReplayRecordings, + scoping.organization_id, + message, + )?; metric!( counter(RelayCounters::ProcessingMessageProduced) += 1, From 9d21e80323f706359a020257f827c123cfae675c Mon Sep 17 00:00:00 2001 From: Joshua Ferge Date: Tue, 6 Jun 2023 18:08:10 -0700 Subject: [PATCH 06/72] fix tests --- relay-replays/src/recording.rs | 1 - relay-server/src/actors/processor.rs | 11 +++++-- tests/integration/fixtures/__init__.py | 9 ++++- .../test_replay_combined_payload.py | 33 +++++++++++++++---- tests/integration/test_replay_events.py | 2 +- 5 files changed, 44 insertions(+), 12 deletions(-) diff --git a/relay-replays/src/recording.rs b/relay-replays/src/recording.rs index c7f8046400..a8d684bd17 100644 --- a/relay-replays/src/recording.rs +++ b/relay-replays/src/recording.rs @@ -11,7 +11,6 @@ //! identified by `type: 5`. The scrubber skips all other node types and does not perform any //! validation beyond JSON parsing. -// use bytes::Bytes; use std::borrow::Cow; use std::cell::RefCell; use std::fmt; diff --git a/relay-server/src/actors/processor.rs b/relay-server/src/actors/processor.rs index e150947e6b..2050243bc7 100644 --- a/relay-server/src/actors/processor.rs +++ b/relay-server/src/actors/processor.rs @@ -1332,7 +1332,14 @@ impl EnvelopeProcessorService { .insert("replay_event", replay_event_item.payload().to_vec()); combined_item_payload .insert("replay_recording", replay_recording_item.payload().to_vec()); - rmp_serde::encode::write(&mut data, &combined_item_payload).expect("msg"); + + if let Err(e) = rmp_serde::encode::write(&mut data, &combined_item_payload) { + relay_log::error!( + "failed to serialize combined replay event and recording: {}", + e + ); + // TODO: what to do here? Drop + emit outcome? + } let mut combined_item = Item::new(ItemType::CombinedReplayEventAndRecording); @@ -3276,7 +3283,7 @@ mod tests { "request": { "url": null, "headers": { - "user-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.5 Safari/605.1.15" + "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.5 Safari/605.1.15" } }, "contexts": { diff --git a/tests/integration/fixtures/__init__.py b/tests/integration/fixtures/__init__.py index abfba75728..e17fb604cb 100644 --- a/tests/integration/fixtures/__init__.py +++ b/tests/integration/fixtures/__init__.py @@ -204,7 +204,14 @@ def send_transaction( self.send_envelope(project_id, envelope) def send_replay_event(self, project_id, payload, item_headers=None): - envelope = Envelope() + envelope = Envelope( + headers=[ + [ + "event_id", + payload["replay_id"], + ] + ] + ) envelope.add_item(Item(payload=PayloadRef(json=payload), type="replay_event")) if envelope.headers is None: envelope.headers = {} diff --git a/tests/integration/test_replay_combined_payload.py b/tests/integration/test_replay_combined_payload.py index 99070a6d7e..30a1c6eac5 100644 --- a/tests/integration/test_replay_combined_payload.py +++ b/tests/integration/test_replay_combined_payload.py @@ -78,23 +78,42 @@ def test_replay_combined_with_processing( assert replay_event["replay_id"] == "515539018c9b4260a6f999572f1661ee" assert payload["replay_recording"] == replay_recording_bytes - # breakpoint() -# def test_replay_events_without_processing(mini_sentry, relay_chain): +# TODO: figure out behavior for this test +# def test_replay_combined_without_processing( +# mini_sentry, relay_chain, replay_recordings_consumer +# ): # relay = relay_chain(min_relay_version="latest") +# replay_recordings_consumer = replay_recordings_consumer(timeout=10) +# replay_recording_bytes = b"{}\n[]" -# project_id = 42 # mini_sentry.add_basic_project_config( -# project_id, extra={"config": {"features": ["organizations:session-replay"]}} +# 42, +# extra={ +# "config": { +# "features": [ +# "organizations:session-replay", +# "organizations:session-replay-combined-envelope-items", +# ] +# } +# }, # ) -# replay_item = generate_replay_sdk_event() +# replay_id = "515539018c9b4260a6f999572f1661ee" + +# replay_event = generate_replay_sdk_event(replay_id=replay_id) + +# envelope = Envelope(headers=[["event_id", replay_id]]) +# envelope.add_item( +# Item(payload=PayloadRef(bytes=replay_recording_bytes), type="replay_recording") +# ) +# envelope.add_item(Item(payload=PayloadRef(json=replay_event), type="replay_event")) -# relay.send_replay_event(42, replay_item) +# relay.send_envelope(42, envelope) # envelope = mini_sentry.captured_events.get(timeout=20) # assert len(envelope.items) == 1 # replay_event = envelope.items[0] -# assert replay_event.type == "replay_event" +# assert replay_event.type == "replay_recording_not_chunked" diff --git a/tests/integration/test_replay_events.py b/tests/integration/test_replay_events.py index 14d8f35da5..0b87d44a93 100644 --- a/tests/integration/test_replay_events.py +++ b/tests/integration/test_replay_events.py @@ -96,7 +96,7 @@ def test_replay_event_with_processing( # Assert the tags and requests objects were normalized to lists of doubles. assert parsed_replay["tags"] == [["transaction", replay["tags"]["transaction"]]] assert parsed_replay["request"] == { - "headers": [["User-Agent", replay["request"]["headers"]["user-Agent"]]] + "headers": [["User-Agent", replay["request"]["headers"]["User-Agent"]]] } # Assert contexts object was pulled out. From d7d18bb0756de165740ba0f563e1f0c65e52130b Mon Sep 17 00:00:00 2001 From: Joshua Ferge Date: Tue, 6 Jun 2023 18:16:15 -0700 Subject: [PATCH 07/72] remove unused test --- .../test_replay_combined_payload.py | 32 ------------------- 1 file changed, 32 deletions(-) diff --git a/tests/integration/test_replay_combined_payload.py b/tests/integration/test_replay_combined_payload.py index 30a1c6eac5..b9eba64b10 100644 --- a/tests/integration/test_replay_combined_payload.py +++ b/tests/integration/test_replay_combined_payload.py @@ -1,40 +1,8 @@ -# import pytest -# import zlib - from sentry_sdk.envelope import Envelope, Item, PayloadRef import msgpack from .test_replay_events import generate_replay_sdk_event import json -# def test_replay_recordings(mini_sentry, relay_chain): -# relay = relay_chain(min_relay_version="latest") - -# project_id = 42 -# mini_sentry.add_basic_project_config( -# project_id, extra={"config": {"features": ["organizations:session-replay"]}} -# ) - -# replay_id = "515539018c9b4260a6f999572f1661ee" - -# replay_event = generate_replay_sdk_event(replay_id=replay_id) - -# envelope = Envelope(headers=[["event_id", replay_id]]) -# envelope.add_item( -# Item(payload=PayloadRef(bytes=b"{}\n[]"), type="replay_recording") -# ) -# envelope.add_item(Item(payload=PayloadRef(json=replay_event), type="replay_event")) - -# relay.send_envelope(project_id, envelope) - -# envelope = mini_sentry.captured_events.get(timeout=1) -# assert len(envelope.items) == 1 - -# replay_combined_item = envelope.items[0] -# assert session_item.type == "replay_recording" - -# replay_recording = session_item.get_bytes() -# assert replay_recording.startswith(b"{}\n") # The body is compressed - def test_replay_combined_with_processing( mini_sentry, relay_with_processing, replay_recordings_consumer From 5ad1304fa0c5094ea280fedd73e40307245f6773 Mon Sep 17 00:00:00 2001 From: Joshua Ferge Date: Tue, 30 Jan 2024 18:28:45 -0800 Subject: [PATCH 08/72] get it compiling --- relay-server/src/actors/processor.rs | 4032 ----------------- relay-server/src/services/processor/replay.rs | 171 +- 2 files changed, 170 insertions(+), 4033 deletions(-) delete mode 100644 relay-server/src/actors/processor.rs diff --git a/relay-server/src/actors/processor.rs b/relay-server/src/actors/processor.rs deleted file mode 100644 index 2050243bc7..0000000000 --- a/relay-server/src/actors/processor.rs +++ /dev/null @@ -1,4032 +0,0 @@ -use std::collections::BTreeMap; -use std::convert::TryFrom; -use std::error::Error; -use std::io::Write; -use std::net; -use std::net::IpAddr as NetIPAddr; -use std::sync::Arc; -use std::time::{Duration, Instant}; - -use brotli::CompressorWriter as BrotliEncoder; -use bytes::Bytes; -use chrono::{DateTime, Duration as SignedDuration, Utc}; -use flate2::write::{GzEncoder, ZlibEncoder}; -use flate2::Compression; -use once_cell::sync::OnceCell; -use serde_json::Value as SerdeValue; -use tokio::sync::Semaphore; - -use relay_auth::RelayVersion; -use relay_common::{ProjectId, ProjectKey, UnixTimestamp}; -use relay_config::{Config, HttpEncoding}; -use relay_dynamic_config::{ErrorBoundary, Feature, ProjectConfig, SessionMetricsConfig}; -use relay_filter::FilterStatKey; -use relay_general::pii::{PiiAttachmentsProcessor, PiiConfigError, PiiProcessor}; -use relay_general::processor::{process_value, ProcessingState}; -use relay_general::protocol::Context::Trace; -use relay_general::protocol::Contexts; -use relay_general::protocol::{ - self, Breadcrumb, ClientReport, Csp, Event, EventType, ExpectCt, ExpectStaple, Hpkp, IpAddr, - LenientString, Metrics, RelayInfo, Replay, ReplayError, SecurityReportType, SessionAggregates, - SessionAttributes, SessionStatus, SessionUpdate, Timestamp, TraceContext, UserReport, Values, -}; -use relay_general::store::{ - ClockDriftProcessor, LightNormalizationConfig, MeasurementsConfig, TransactionNameConfig, -}; -use relay_general::types::{Annotated, Array, Empty, FromValue, Object, ProcessingAction, Value}; -use relay_general::user_agent::RawUserAgentInfo; -use relay_metrics::{Bucket, InsertMetrics, MergeBuckets, Metric}; -use relay_quotas::{DataCategory, ReasonCode}; -use relay_redis::RedisPool; -use relay_replays::recording::RecordingScrubber; -use relay_sampling::{DynamicSamplingContext, MatchedRuleIds}; -use relay_statsd::metric; -use relay_system::{Addr, FromMessage, NoResponse, Service}; -#[cfg(feature = "processing")] -use { - crate::actors::envelopes::SendMetrics, - crate::actors::project_cache::UpdateRateLimits, - crate::service::ServiceError, - crate::utils::{EnvelopeLimiter, MetricsLimiter}, - anyhow::Context, - relay_general::protocol::{Context as SentryContext, ProfileContext}, - relay_general::store::{GeoIpLookup, StoreConfig, StoreProcessor}, - relay_quotas::{RateLimitingError, RedisRateLimiter}, - symbolic_unreal::{Unreal4Error, Unreal4ErrorKind}, -}; - -use crate::actors::envelopes::{EnvelopeManager, SendEnvelope, SendEnvelopeError, SubmitEnvelope}; -use crate::actors::outcome::{DiscardReason, Outcome, TrackOutcome}; -use crate::actors::project::ProjectState; -use crate::actors::project_cache::ProjectCache; -use crate::actors::upstream::{SendRequest, UpstreamRelay}; -use crate::envelope::{AttachmentType, ContentType, Envelope, Item, ItemType}; -use crate::extractors::RequestMeta; -use crate::metrics_extraction::sessions::extract_session_metrics; -use crate::metrics_extraction::transactions::extract_transaction_metrics; -use crate::metrics_extraction::transactions::types::ExtractMetricsError; -use crate::statsd::{RelayCounters, RelayHistograms, RelayTimers}; -use crate::utils::{ - self, get_sampling_key, log_transaction_name_metrics, ChunkedFormDataAggregator, FormDataIter, - ItemAction, ManagedEnvelope, SamplingResult, -}; - -/// The minimum clock drift for correction to apply. -const MINIMUM_CLOCK_DRIFT: Duration = Duration::from_secs(55 * 60); - -/// An error returned when handling [`ProcessEnvelope`]. -#[derive(Debug, thiserror::Error)] -pub enum ProcessingError { - #[error("invalid json in event")] - InvalidJson(#[source] serde_json::Error), - - #[error("invalid message pack event payload")] - InvalidMsgpack(#[from] rmp_serde::decode::Error), - - #[cfg(feature = "processing")] - #[error("invalid unreal crash report")] - InvalidUnrealReport(#[source] Unreal4Error), - - #[error("event payload too large")] - PayloadTooLarge, - - #[error("invalid transaction event")] - InvalidTransaction, - - #[error("envelope processor failed")] - ProcessingFailed(#[from] ProcessingAction), - - #[error("duplicate {0} in event")] - DuplicateItem(ItemType), - - #[error("failed to extract event payload")] - NoEventPayload, - - #[error("missing project id in DSN")] - MissingProjectId, - - #[error("invalid security report type")] - InvalidSecurityType, - - #[error("invalid security report")] - InvalidSecurityReport(#[source] serde_json::Error), - - #[error("event filtered with reason: {0:?}")] - EventFiltered(FilterStatKey), - - #[error("missing or invalid required event timestamp")] - InvalidTimestamp, - - #[error("could not serialize event payload")] - SerializeFailed(#[source] serde_json::Error), - - #[cfg(feature = "processing")] - #[error("failed to apply quotas")] - QuotasFailed(#[from] RateLimitingError), - - #[error("event dropped by sampling rule {0}")] - Sampled(MatchedRuleIds), - - #[error("invalid pii config")] - PiiConfigError(PiiConfigError), -} - -impl ProcessingError { - fn to_outcome(&self) -> Option { - match *self { - // General outcomes for invalid events - Self::PayloadTooLarge => Some(Outcome::Invalid(DiscardReason::TooLarge)), - Self::InvalidJson(_) => Some(Outcome::Invalid(DiscardReason::InvalidJson)), - Self::InvalidMsgpack(_) => Some(Outcome::Invalid(DiscardReason::InvalidMsgpack)), - Self::InvalidSecurityType => Some(Outcome::Invalid(DiscardReason::SecurityReportType)), - Self::InvalidSecurityReport(_) => Some(Outcome::Invalid(DiscardReason::SecurityReport)), - Self::InvalidTransaction => Some(Outcome::Invalid(DiscardReason::InvalidTransaction)), - Self::InvalidTimestamp => Some(Outcome::Invalid(DiscardReason::Timestamp)), - Self::DuplicateItem(_) => Some(Outcome::Invalid(DiscardReason::DuplicateItem)), - Self::NoEventPayload => Some(Outcome::Invalid(DiscardReason::NoEventPayload)), - - // Processing-only outcomes (Sentry-internal Relays) - #[cfg(feature = "processing")] - Self::InvalidUnrealReport(ref err) - if err.kind() == Unreal4ErrorKind::BadCompression => - { - Some(Outcome::Invalid(DiscardReason::InvalidCompression)) - } - #[cfg(feature = "processing")] - Self::InvalidUnrealReport(_) => Some(Outcome::Invalid(DiscardReason::ProcessUnreal)), - - // Internal errors - Self::SerializeFailed(_) | Self::ProcessingFailed(_) => { - Some(Outcome::Invalid(DiscardReason::Internal)) - } - #[cfg(feature = "processing")] - Self::QuotasFailed(_) => Some(Outcome::Invalid(DiscardReason::Internal)), - Self::PiiConfigError(_) => Some(Outcome::Invalid(DiscardReason::ProjectStatePii)), - - // These outcomes are emitted at the source. - Self::MissingProjectId => None, - Self::EventFiltered(_) => None, - Self::Sampled(_) => None, - } - } - - fn is_unexpected(&self) -> bool { - self.to_outcome() - .map_or(false, |outcome| outcome.is_unexpected()) - } - - fn should_keep_metrics(&self) -> bool { - matches!(self, Self::Sampled(_)) - } -} - -#[cfg(feature = "processing")] -impl From for ProcessingError { - fn from(err: Unreal4Error) -> Self { - match err.kind() { - Unreal4ErrorKind::TooLarge => Self::PayloadTooLarge, - _ => ProcessingError::InvalidUnrealReport(err), - } - } -} - -impl From for ProcessingError { - fn from(error: ExtractMetricsError) -> Self { - match error { - ExtractMetricsError::MissingTimestamp | ExtractMetricsError::InvalidTimestamp => { - Self::InvalidTimestamp - } - } - } -} - -type ExtractedEvent = (Annotated, usize); - -/// Checks if the Event includes unprintable fields. -#[cfg(feature = "processing")] -fn has_unprintable_fields(event: &Annotated) -> bool { - fn is_unprintable(value: &&str) -> bool { - value.chars().any(|c| { - c == '\u{fffd}' // unicode replacement character - || (c.is_control() && !c.is_whitespace()) // non-whitespace control characters - }) - } - if let Some(event) = event.value() { - let env = event.environment.as_str().filter(is_unprintable); - let release = event.release.as_str().filter(is_unprintable); - env.is_some() || release.is_some() - } else { - false - } -} - -#[derive(Debug, Default)] -struct ExtractedMetrics { - /// Metrics associated with the project that the transaction belongs to. - project_metrics: Vec, - /// Metrics associated with the sampling project (a.k.a. root or head project) - /// which started the trace. See [`ProcessEnvelopeState::sampling_project_state`]. - sampling_metrics: Vec, -} - -impl ExtractedMetrics { - fn send_metrics(self, envelope: &Envelope, project_cache: Addr) { - let project_key = envelope.meta().public_key(); - - if !self.project_metrics.is_empty() { - project_cache.send(InsertMetrics::new(project_key, self.project_metrics)); - } - - if !self.sampling_metrics.is_empty() { - // If no sampling project state is available, we associate the sampling - // metrics with the current project. - // - // project_without_tracing -> metrics goes to self - // dependent_project_with_tracing -> metrics goes to root - // root_project_with_tracing -> metrics goes to root == self - let sampling_project_key = get_sampling_key(envelope).unwrap_or(project_key); - project_cache.send(InsertMetrics::new( - sampling_project_key, - self.sampling_metrics, - )); - } - } -} - -/// A state container for envelope processing. -#[derive(Debug)] -struct ProcessEnvelopeState { - /// The extracted event payload. - /// - /// For Envelopes without event payloads, this contains `Annotated::empty`. If a single item has - /// `creates_event`, the event is required and the pipeline errors if no payload can be - /// extracted. - event: Annotated, - - /// Track whether transaction metrics were already extracted. - transaction_metrics_extracted: bool, - - /// Partial metrics of the Event during construction. - /// - /// The pipeline stages can add to this metrics objects. In `finalize_event`, the metrics are - /// persisted into the Event. All modifications afterwards will have no effect. - metrics: Metrics, - - /// A list of cumulative sample rates applied to this event. - /// - /// This element is obtained from the event or transaction item and re-serialized into the - /// resulting item. - sample_rates: Option, - - /// The result of a dynamic sampling operation on this envelope. - /// - /// This defaults to [`SamplingResult::Keep`] and is determined based on dynamic sampling rules - /// in the project configuration. In the drop case, this contains a list of rules that applied - /// on the envelope. - sampling_result: SamplingResult, - - /// Metrics extracted from items in the envelope. - /// - /// Relay can extract metrics for sessions and transactions, which is controlled by - /// configuration objects in the project config. - extracted_metrics: ExtractedMetrics, - - /// The state of the project that this envelope belongs to. - project_state: Arc, - - /// The state of the project that initiated the current trace. - /// This is the config used for trace-based dynamic sampling. - sampling_project_state: Option>, - - /// The id of the project that this envelope is ingested into. - /// - /// This identifier can differ from the one stated in the Envelope's DSN if the key was moved to - /// a new project or on the legacy endpoint. In that case, normalization will update the project - /// ID. - project_id: ProjectId, - - /// The managed envelope before processing. - managed_envelope: ManagedEnvelope, -} - -impl ProcessEnvelopeState { - /// Returns a reference to the contained [`Envelope`]. - fn envelope(&self) -> &Envelope { - self.managed_envelope.envelope() - } - - /// Returns a mutable reference to the contained [`Envelope`]. - fn envelope_mut(&mut self) -> &mut Envelope { - self.managed_envelope.envelope_mut() - } - - /// Returns whether any item in the envelope creates an event in any relay. - /// - /// This is used to branch into the processing pipeline. If this function returns false, only - /// rate limits are executed. If this function returns true, an event is created either in the - /// current relay or in an upstream processing relay. - fn creates_event(&self) -> bool { - self.envelope().items().any(Item::creates_event) - } - - /// Returns true if there is an event in the processing state. - /// - /// The event was previously removed from the Envelope. This returns false if there was an - /// invalid event item. - fn has_event(&self) -> bool { - self.event.value().is_some() - } - - /// Returns the event type if there is an event. - /// - /// If the event does not have a type, `Some(EventType::Default)` is assumed. If, in contrast, there - /// is no event, `None` is returned. - fn event_type(&self) -> Option { - self.event - .value() - .map(|event| event.ty.value().copied().unwrap_or_default()) - } - - /// Returns the data category if there is an event. - /// - /// The data category is computed from the event type. Both `Default` and `Error` events map to - /// the `Error` data category. If there is no Event, `None` is returned. - fn event_category(&self) -> Option { - self.event_type().map(DataCategory::from) - } - - /// Removes the event payload from this processing state. - #[cfg(feature = "processing")] - fn remove_event(&mut self) { - self.event = Annotated::empty(); - } -} - -/// Fields of client reports that map to specific [`Outcome`]s without content. -#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] -enum ClientReportField { - /// The event has been filtered by an inbound data filter. - Filtered, - - /// The event has been filtered by a sampling rule. - FilteredSampling, - - /// The event has been rate limited. - RateLimited, - - /// The event has already been discarded on the client side. - ClientDiscard, -} - -/// Parse an outcome from an outcome ID and a reason string. -/// -/// Currently only used to reconstruct outcomes encoded in client reports. -fn outcome_from_parts(field: ClientReportField, reason: &str) -> Result { - match field { - ClientReportField::FilteredSampling => match reason.strip_prefix("Sampled:") { - Some(rule_ids) => MatchedRuleIds::from_string(rule_ids) - .map(Outcome::FilteredSampling) - .map_err(|_| ()), - None => Err(()), - }, - ClientReportField::ClientDiscard => Ok(Outcome::ClientDiscard(reason.into())), - ClientReportField::Filtered => Ok(Outcome::Filtered( - FilterStatKey::try_from(reason).map_err(|_| ())?, - )), - ClientReportField::RateLimited => Ok(Outcome::RateLimited(match reason { - "" => None, - other => Some(ReasonCode::new(other)), - })), - } -} - -/// Response of the [`ProcessEnvelope`] message. -#[cfg_attr(not(feature = "processing"), allow(dead_code))] -pub struct ProcessEnvelopeResponse { - /// The processed envelope. - /// - /// This is `Some` if the envelope passed inbound filtering and rate limiting. Invalid items are - /// removed from the envelope. Otherwise, if the envelope is empty or the entire envelope needs - /// to be dropped, this is `None`. - pub envelope: Option, -} - -/// Applies processing to all contents of the given envelope. -/// -/// Depending on the contents of the envelope and Relay's mode, this includes: -/// -/// - Basic normalization and validation for all item types. -/// - Clock drift correction if the required `sent_at` header is present. -/// - Expansion of certain item types (e.g. unreal). -/// - Store normalization for event payloads in processing mode. -/// - Rate limiters and inbound filters on events in processing mode. -#[derive(Debug)] -pub struct ProcessEnvelope { - pub envelope: ManagedEnvelope, - pub project_state: Arc, - pub sampling_project_state: Option>, -} - -/// Parses a list of metrics or metric buckets and pushes them to the project's aggregator. -/// -/// This parses and validates the metrics: -/// - For [`Metrics`](ItemType::Metrics), each metric is parsed separately, and invalid metrics are -/// ignored independently. -/// - For [`MetricBuckets`](ItemType::MetricBuckets), the entire list of buckets is parsed and -/// dropped together on parsing failure. -/// - Other items will be ignored with an error message. -/// -/// Additionally, processing applies clock drift correction using the system clock of this Relay, if -/// the Envelope specifies the [`sent_at`](Envelope::sent_at) header. -#[derive(Debug)] -pub struct ProcessMetrics { - /// A list of metric items. - pub items: Vec, - - /// The target project. - pub project_key: ProjectKey, - - /// The instant at which the request was received. - pub start_time: Instant, - - /// The value of the Envelope's [`sent_at`](Envelope::sent_at) header for clock drift - /// correction. - pub sent_at: Option>, -} - -/// Applies HTTP content encoding to an envelope's payload. -/// -/// This message is a workaround for a single-threaded upstream service. -#[derive(Debug)] -pub struct EncodeEnvelope { - request: SendEnvelope, -} - -impl EncodeEnvelope { - /// Creates a new `EncodeEnvelope` message from `SendEnvelope` request. - pub fn new(request: SendEnvelope) -> Self { - Self { request } - } -} - -/// Applies rate limits to metrics buckets and forwards them to the envelope manager. -#[cfg(feature = "processing")] -#[derive(Debug)] -pub struct RateLimitFlushBuckets { - pub bucket_limiter: MetricsLimiter, - pub partition_key: Option, -} - -/// CPU-intensive processing tasks for envelopes. -#[derive(Debug)] -pub enum EnvelopeProcessor { - ProcessEnvelope(Box), - ProcessMetrics(Box), - EncodeEnvelope(Box), - #[cfg(feature = "processing")] - RateLimitFlushBuckets(RateLimitFlushBuckets), -} - -impl relay_system::Interface for EnvelopeProcessor {} - -impl FromMessage for EnvelopeProcessor { - type Response = relay_system::NoResponse; - - fn from_message(message: ProcessEnvelope, _sender: ()) -> Self { - Self::ProcessEnvelope(Box::new(message)) - } -} - -impl FromMessage for EnvelopeProcessor { - type Response = NoResponse; - - fn from_message(message: ProcessMetrics, _: ()) -> Self { - Self::ProcessMetrics(Box::new(message)) - } -} - -impl FromMessage for EnvelopeProcessor { - type Response = NoResponse; - - fn from_message(message: EncodeEnvelope, _: ()) -> Self { - Self::EncodeEnvelope(Box::new(message)) - } -} - -#[cfg(feature = "processing")] -impl FromMessage for EnvelopeProcessor { - type Response = NoResponse; - - fn from_message(message: RateLimitFlushBuckets, _: ()) -> Self { - Self::RateLimitFlushBuckets(message) - } -} - -/// Service implementing the [`EnvelopeProcessor`] interface. -/// -/// This service handles messages in a worker pool with configurable concurrency. -pub struct EnvelopeProcessorService { - config: Arc, - envelope_manager: Addr, - project_cache: Addr, - outcome_aggregator: Addr, - upstream_relay: Addr, - #[cfg(feature = "processing")] - rate_limiter: Option, - #[cfg(feature = "processing")] - geoip_lookup: Option, -} - -impl EnvelopeProcessorService { - /// Creates a multi-threaded envelope processor. - pub fn new( - config: Arc, - _redis: Option, - envelope_manager: Addr, - outcome_aggregator: Addr, - project_cache: Addr, - upstream_relay: Addr, - ) -> anyhow::Result { - #[cfg(feature = "processing")] - { - let geoip_lookup = match config.geoip_path() { - Some(p) => Some(GeoIpLookup::open(p).context(ServiceError::GeoIp)?), - None => None, - }; - - let rate_limiter = - _redis.map(|pool| RedisRateLimiter::new(pool).max_limit(config.max_rate_limit())); - - Ok(Self { - config, - rate_limiter, - geoip_lookup, - envelope_manager, - outcome_aggregator, - project_cache, - upstream_relay, - }) - } - - #[cfg(not(feature = "processing"))] - Ok(Self { - config, - envelope_manager, - outcome_aggregator, - project_cache, - upstream_relay, - }) - } - - /// Returns Ok(true) if attributes were modified. - /// Returns Err if the session should be dropped. - fn validate_attributes( - &self, - client_addr: &Option, - attributes: &mut SessionAttributes, - ) -> Result { - let mut changed = false; - - let release = &attributes.release; - if let Err(e) = protocol::validate_release(release) { - relay_log::trace!( - error = &e as &dyn Error, - release, - "skipping session with invalid release" - ); - return Err(()); - } - - if let Some(ref env) = attributes.environment { - if let Err(e) = protocol::validate_environment(env) { - relay_log::trace!( - error = &e as &dyn Error, - env, - "removing invalid environment" - ); - attributes.environment = None; - changed = true; - } - } - - if let Some(ref ip_address) = attributes.ip_address { - if ip_address.is_auto() { - attributes.ip_address = client_addr.map(IpAddr::from); - changed = true; - } - } - - Ok(changed) - } - - fn is_valid_session_timestamp( - &self, - received: DateTime, - timestamp: DateTime, - ) -> bool { - let max_age = SignedDuration::seconds(self.config.max_session_secs_in_past()); - if (received - timestamp) > max_age { - relay_log::trace!("skipping session older than {} days", max_age.num_days()); - return false; - } - - let max_future = SignedDuration::seconds(self.config.max_secs_in_future()); - if (timestamp - received) > max_future { - relay_log::trace!( - "skipping session more than {}s in the future", - max_future.num_seconds() - ); - return false; - } - - true - } - - /// Returns true if the item should be kept. - #[allow(clippy::too_many_arguments)] - fn process_session( - &self, - item: &mut Item, - received: DateTime, - client: Option<&str>, - client_addr: Option, - metrics_config: SessionMetricsConfig, - clock_drift_processor: &ClockDriftProcessor, - extracted_metrics: &mut Vec, - ) -> bool { - let mut changed = false; - let payload = item.payload(); - - // sessionupdate::parse is already tested - let mut session = match SessionUpdate::parse(&payload) { - Ok(session) => session, - Err(error) => { - relay_log::trace!( - error = &error as &dyn Error, - "skipping invalid session payload" - ); - return false; - } - }; - - if session.sequence == u64::MAX { - relay_log::trace!("skipping session due to sequence overflow"); - return false; - }; - - if clock_drift_processor.is_drifted() { - relay_log::trace!("applying clock drift correction to session"); - clock_drift_processor.process_datetime(&mut session.started); - clock_drift_processor.process_datetime(&mut session.timestamp); - changed = true; - } - - if session.timestamp < session.started { - relay_log::trace!("fixing session timestamp to {}", session.timestamp); - session.timestamp = session.started; - changed = true; - } - - // Log the timestamp delay for all sessions after clock drift correction. - let session_delay = received - session.timestamp; - if session_delay > SignedDuration::minutes(1) { - metric!( - timer(RelayTimers::TimestampDelay) = session_delay.to_std().unwrap(), - category = "session", - ); - } - - // Validate timestamps - for t in [session.timestamp, session.started] { - if !self.is_valid_session_timestamp(received, t) { - return false; - } - } - - // Validate attributes - match self.validate_attributes(&client_addr, &mut session.attributes) { - Err(_) => return false, - Ok(changed_attributes) => { - changed |= changed_attributes; - } - } - - if self.config.processing_enabled() && matches!(session.status, SessionStatus::Unknown(_)) { - return false; - } - - // Extract metrics if they haven't been extracted by a prior Relay - if metrics_config.is_enabled() - && !item.metrics_extracted() - && !matches!(session.status, SessionStatus::Unknown(_)) - { - extract_session_metrics( - &session.attributes, - &session, - client, - extracted_metrics, - metrics_config.should_extract_abnormal_mechanism(), - ); - item.set_metrics_extracted(true); - } - - // Drop the session if metrics have been extracted in this or a prior Relay - if metrics_config.should_drop() && item.metrics_extracted() { - return false; - } - - if changed { - let json_string = match serde_json::to_string(&session) { - Ok(json) => json, - Err(err) => { - relay_log::error!(error = &err as &dyn Error, "failed to serialize session"); - return false; - } - }; - - item.set_payload(ContentType::Json, json_string); - } - - true - } - - #[allow(clippy::too_many_arguments)] - fn process_session_aggregates( - &self, - item: &mut Item, - received: DateTime, - client: Option<&str>, - client_addr: Option, - metrics_config: SessionMetricsConfig, - clock_drift_processor: &ClockDriftProcessor, - extracted_metrics: &mut Vec, - ) -> bool { - let mut changed = false; - let payload = item.payload(); - - let mut session = match SessionAggregates::parse(&payload) { - Ok(session) => session, - Err(error) => { - relay_log::trace!( - error = &error as &dyn Error, - "skipping invalid sessions payload" - ); - return false; - } - }; - - if clock_drift_processor.is_drifted() { - relay_log::trace!("applying clock drift correction to session"); - for aggregate in &mut session.aggregates { - clock_drift_processor.process_datetime(&mut aggregate.started); - } - changed = true; - } - - // Validate timestamps - session - .aggregates - .retain(|aggregate| self.is_valid_session_timestamp(received, aggregate.started)); - - // Aftter timestamp validation, aggregates could now be empty - if session.aggregates.is_empty() { - return false; - } - - // Validate attributes - match self.validate_attributes(&client_addr, &mut session.attributes) { - Err(_) => return false, - Ok(changed_attributes) => { - changed |= changed_attributes; - } - } - - // Extract metrics if they haven't been extracted by a prior Relay - if metrics_config.is_enabled() && !item.metrics_extracted() { - for aggregate in &session.aggregates { - extract_session_metrics( - &session.attributes, - aggregate, - client, - extracted_metrics, - metrics_config.should_extract_abnormal_mechanism(), - ); - item.set_metrics_extracted(true); - } - } - - // Drop the aggregate if metrics have been extracted in this or a prior Relay - if metrics_config.should_drop() && item.metrics_extracted() { - return false; - } - - if changed { - let json_string = match serde_json::to_string(&session) { - Ok(json) => json, - Err(err) => { - relay_log::error!(error = &err as &dyn Error, "failed to serialize session"); - return false; - } - }; - - item.set_payload(ContentType::Json, json_string); - } - - true - } - - /// Validates all sessions and session aggregates in the envelope, if any. - /// - /// Both are removed from the envelope if they contain invalid JSON or if their timestamps - /// are out of range after clock drift correction. - fn process_sessions(&self, state: &mut ProcessEnvelopeState) { - let received = state.managed_envelope.received_at(); - let extracted_metrics = &mut state.extracted_metrics.project_metrics; - let metrics_config = state.project_state.config().session_metrics; - let envelope = state.managed_envelope.envelope_mut(); - let client = envelope.meta().client().map(|x| x.to_owned()); - let client_addr = envelope.meta().client_addr(); - - let clock_drift_processor = - ClockDriftProcessor::new(envelope.sent_at(), received).at_least(MINIMUM_CLOCK_DRIFT); - - state.managed_envelope.retain_items(|item| { - let should_keep = match item.ty() { - ItemType::Session => self.process_session( - item, - received, - client.as_deref(), - client_addr, - metrics_config, - &clock_drift_processor, - extracted_metrics, - ), - ItemType::Sessions => self.process_session_aggregates( - item, - received, - client.as_deref(), - client_addr, - metrics_config, - &clock_drift_processor, - extracted_metrics, - ), - _ => true, // Keep all other item types - }; - if should_keep { - ItemAction::Keep - } else { - ItemAction::DropSilently // sessions never log outcomes. - } - }); - } - - /// Validates and normalizes all user report items in the envelope. - /// - /// User feedback items are removed from the envelope if they contain invalid JSON or if the - /// JSON violates the schema (basic type validation). Otherwise, their normalized representation - /// is written back into the item. - fn process_user_reports(&self, state: &mut ProcessEnvelopeState) { - state.managed_envelope.retain_items(|item| { - if item.ty() != &ItemType::UserReport { - return ItemAction::Keep; - }; - - let report = match serde_json::from_slice::(&item.payload()) { - Ok(session) => session, - Err(error) => { - relay_log::error!(error = &error as &dyn Error, "failed to store user report"); - return ItemAction::DropSilently; - } - }; - - let json_string = match serde_json::to_string(&report) { - Ok(json) => json, - Err(err) => { - relay_log::error!( - error = &err as &dyn Error, - "failed to serialize user report" - ); - return ItemAction::DropSilently; - } - }; - - item.set_payload(ContentType::Json, json_string); - ItemAction::Keep - }); - } - - /// Validates and extracts client reports. - /// - /// At the moment client reports are primarily used to transfer outcomes from - /// client SDKs. The outcomes are removed here and sent directly to the outcomes - /// system. - fn process_client_reports(&self, state: &mut ProcessEnvelopeState) { - // if client outcomes are disabled we leave the the client reports unprocessed - // and pass them on. - if !self.config.emit_outcomes().any() || !self.config.emit_client_outcomes() { - // if a processing relay has client outcomes disabled we drop them. - if self.config.processing_enabled() { - state.managed_envelope.retain_items(|item| match item.ty() { - ItemType::ClientReport => ItemAction::DropSilently, - _ => ItemAction::Keep, - }); - } - return; - } - - let mut timestamp = None; - let mut output_events = BTreeMap::new(); - let received = state.managed_envelope.received_at(); - - let clock_drift_processor = ClockDriftProcessor::new(state.envelope().sent_at(), received) - .at_least(MINIMUM_CLOCK_DRIFT); - - // we're going through all client reports but we're effectively just merging - // them into the first one. - state.managed_envelope.retain_items(|item| { - if item.ty() != &ItemType::ClientReport { - return ItemAction::Keep; - }; - match ClientReport::parse(&item.payload()) { - Ok(ClientReport { - timestamp: report_timestamp, - discarded_events, - rate_limited_events, - filtered_events, - filtered_sampling_events, - }) => { - // Glue all discarded events together and give them the appropriate outcome type - let input_events = discarded_events - .into_iter() - .map(|discarded_event| (ClientReportField::ClientDiscard, discarded_event)) - .chain( - filtered_events.into_iter().map(|discarded_event| { - (ClientReportField::Filtered, discarded_event) - }), - ) - .chain(filtered_sampling_events.into_iter().map(|discarded_event| { - (ClientReportField::FilteredSampling, discarded_event) - })) - .chain(rate_limited_events.into_iter().map(|discarded_event| { - (ClientReportField::RateLimited, discarded_event) - })); - - for (outcome_type, discarded_event) in input_events { - if discarded_event.reason.len() > 200 { - relay_log::trace!("ignored client outcome with an overlong reason"); - continue; - } - *output_events - .entry(( - outcome_type, - discarded_event.reason, - discarded_event.category, - )) - .or_insert(0) += discarded_event.quantity; - } - if let Some(ts) = report_timestamp { - timestamp.get_or_insert(ts); - } - } - Err(err) => { - relay_log::trace!(error = &err as &dyn Error, "invalid client report received") - } - } - ItemAction::DropSilently - }); - - if output_events.is_empty() { - return; - } - - let timestamp = - timestamp.get_or_insert_with(|| UnixTimestamp::from_secs(received.timestamp() as u64)); - - if clock_drift_processor.is_drifted() { - relay_log::trace!("applying clock drift correction to client report"); - clock_drift_processor.process_timestamp(timestamp); - } - - let max_age = SignedDuration::seconds(self.config.max_secs_in_past()); - // also if we unable to parse the timestamp, we assume it's way too old here. - let in_past = timestamp - .as_datetime() - .map(|ts| (received - ts) > max_age) - .unwrap_or(true); - if in_past { - relay_log::trace!( - "skipping client outcomes older than {} days", - max_age.num_days() - ); - return; - } - - let max_future = SignedDuration::seconds(self.config.max_secs_in_future()); - // also if we unable to parse the timestamp, we assume it's way far in the future here. - let in_future = timestamp - .as_datetime() - .map(|ts| (ts - received) > max_future) - .unwrap_or(true); - if in_future { - relay_log::trace!( - "skipping client outcomes more than {}s in the future", - max_future.num_seconds() - ); - return; - } - - for ((outcome_type, reason, category), quantity) in output_events.into_iter() { - let outcome = match outcome_from_parts(outcome_type, &reason) { - Ok(outcome) => outcome, - Err(_) => { - relay_log::trace!(?outcome_type, reason, "invalid outcome combination"); - continue; - } - }; - - self.outcome_aggregator.send(TrackOutcome { - // If we get to this point, the unwrap should not be used anymore, since we know by - // now that the timestamp can be parsed, but just incase we fallback to UTC current - // `DateTime`. - timestamp: timestamp.as_datetime().unwrap_or_else(Utc::now), - scoping: state.managed_envelope.scoping(), - outcome, - event_id: None, - remote_addr: None, // omitting the client address allows for better aggregation - category, - quantity, - }); - } - } - - /// Remove profiles from the envelope if they can not be parsed - fn filter_profiles(&self, state: &mut ProcessEnvelopeState) { - state.managed_envelope.retain_items(|item| match item.ty() { - ItemType::Profile => match relay_profiling::parse_metadata(&item.payload()) { - Ok(_) => ItemAction::Keep, - Err(err) => ItemAction::Drop(Outcome::Invalid(DiscardReason::Profiling( - relay_profiling::discard_reason(err), - ))), - }, - _ => ItemAction::Keep, - }); - } - - /// Normalize monitor check-ins and remove invalid ones. - #[cfg(feature = "processing")] - fn process_check_ins(&self, state: &mut ProcessEnvelopeState) { - state.managed_envelope.retain_items(|item| { - if item.ty() != &ItemType::CheckIn { - return ItemAction::Keep; - } - - match relay_monitors::process_check_in(&item.payload()) { - Ok(processed) => { - item.set_payload(ContentType::Json, processed); - ItemAction::Keep - } - Err(error) => { - // TODO: Track an outcome. - relay_log::debug!( - error = &error as &dyn Error, - "dropped invalid monitor check-in" - ); - ItemAction::DropSilently - } - } - }) - } - - /// Count the number of profiles that are in the envelope and emit accepted outcome. - /// - /// "processed" profiles are an abstract data category that does not represent actual data - /// going through our pipeline. Instead, the number of accepted "processed" profiles is counted as - /// - /// ```text - /// processed_profiles = indexed_profiles + sampled_profiles - /// ``` - /// - /// The "processed" outcome for sampled profiles is generated by the Kafka producer - /// (see `transform_outcome` in [`crate::actors::store`]), but for "indexed" profiles, we count - /// the corresponding number of processed profiles here. - /// - /// NOTE: Instead of emitting a [processed](`DataCategory::Profile`) outcome here, - /// we could also do it in sentry, in the same place where the [indexed](`DataCategory::ProfileIndexed`) - /// outcome is logged. We do it here to be consistent with profiles that are dropped by dynamic sampling, - /// which also count as "processed" even though they did not pass through the `process_profiles` step yet. - /// - /// - /// In the future, we might actually extract metrics from profiles before dynamic sampling, - /// like we do for transactions. At that point, this code should be removed, and we should - /// enforce rate limits and emit outcomes based on the collect profile metric, as we do for - /// transactions. - #[cfg(feature = "processing")] - fn count_processed_profiles(&self, state: &mut ProcessEnvelopeState) { - let profile_count: usize = state - .managed_envelope - .envelope_mut() - .items_mut() - .filter(|item| item.ty() == &ItemType::Profile) - .map(|item| { - item.set_profile_counted_as_processed(); - item.quantity() - }) - .sum(); - - if profile_count == 0 { - return; - } - - self.outcome_aggregator.send(TrackOutcome { - timestamp: state.managed_envelope.received_at(), - scoping: state.managed_envelope.scoping(), - outcome: Outcome::Accepted, - event_id: None, - remote_addr: None, - category: DataCategory::Profile, - quantity: profile_count as u32, // truncates to `u32::MAX` - }); - - // TODO: At this point, we should also ensure that the envelope summary gets recomputed. - // But recomputing the summary after extracting the event is currently problematic, because it - // sets the envelope type to `None`. This needs to be solved in a follow-up. - } - - /// Process profiles and set the profile ID in the profile context on the transaction if successful - #[cfg(feature = "processing")] - fn process_profiles(&self, state: &mut ProcessEnvelopeState) { - state.managed_envelope.retain_items(|item| match item.ty() { - ItemType::Profile => { - match relay_profiling::expand_profile(&item.payload(), state.event.value()) { - Ok((profile_id, payload)) => { - if payload.len() <= self.config.max_profile_size() { - if let Some(event) = state.event.value_mut() { - if event.ty.value() == Some(&EventType::Transaction) { - let contexts = event.contexts.get_or_insert_with(Contexts::new); - contexts.add(SentryContext::Profile(Box::new( - ProfileContext { - profile_id: Annotated::new(profile_id), - }, - ))); - } - } - item.set_payload(ContentType::Json, payload); - ItemAction::Keep - } else { - if let Some(event) = state.event.value_mut() { - if event.ty.value() == Some(&EventType::Transaction) { - let contexts = event.contexts.get_or_insert_with(Contexts::new); - contexts.remove(ProfileContext::default_key()); - } - } - ItemAction::Drop(Outcome::Invalid(DiscardReason::Profiling( - relay_profiling::discard_reason( - relay_profiling::ProfileError::ExceedSizeLimit, - ), - ))) - } - } - Err(err) => { - if let Some(event) = state.event.value_mut() { - if event.ty.value() == Some(&EventType::Transaction) { - let contexts = event.contexts.get_or_insert_with(Contexts::new); - contexts.remove(ProfileContext::default_key()); - } - } - - match err { - relay_profiling::ProfileError::InvalidJson(_) => { - relay_log::warn!(error = &err as &dyn Error, "invalid profile"); - } - _ => relay_log::debug!(error = &err as &dyn Error, "invalid profile"), - }; - ItemAction::Drop(Outcome::Invalid(DiscardReason::Profiling( - relay_profiling::discard_reason(err), - ))) - } - } - } - _ => ItemAction::Keep, - }); - } - - /// Remove replays if the feature flag is not enabled. - fn process_replays(&self, state: &mut ProcessEnvelopeState) -> Result<(), ProcessingError> { - let project_state = &state.project_state; - let replays_enabled = project_state.has_feature(Feature::SessionReplay); - let scrubbing_enabled = project_state.has_feature(Feature::SessionReplayRecordingScrubbing); - - let meta = state.envelope().meta().clone(); - let client_addr = meta.client_addr(); - let event_id = state.envelope().event_id(); - - let limit = self.config.max_replay_uncompressed_size(); - let config = project_state.config(); - let datascrubbing_config = config - .datascrubbing_settings - .pii_config() - .map_err(|e| ProcessingError::PiiConfigError(e.clone()))? - .as_ref(); - let mut scrubber = - RecordingScrubber::new(limit, config.pii_config.as_ref(), datascrubbing_config); - - let user_agent = &RawUserAgentInfo { - user_agent: meta.user_agent(), - client_hints: meta.client_hints().as_deref(), - }; - - state.managed_envelope.retain_items(|item| match item.ty() { - ItemType::ReplayEvent => { - if !replays_enabled { - return ItemAction::DropSilently; - } - - match self.process_replay_event(&item.payload(), config, client_addr, user_agent) { - Ok(replay) => match replay.to_json() { - Ok(json) => { - item.set_payload(ContentType::Json, json); - ItemAction::Keep - } - Err(error) => { - relay_log::error!( - error = &error as &dyn Error, - "failed to serialize replay" - ); - ItemAction::Keep - } - }, - Err(error) => { - relay_log::warn!(error = &error as &dyn Error, "invalid replay event"); - ItemAction::Drop(Outcome::Invalid(match error { - ReplayError::NoContent => DiscardReason::InvalidReplayEventNoPayload, - ReplayError::CouldNotScrub(_) => DiscardReason::InvalidReplayEventPii, - ReplayError::CouldNotParse(_) => DiscardReason::InvalidReplayEvent, - ReplayError::InvalidPayload(_) => DiscardReason::InvalidReplayEvent, - })) - } - } - } - ItemType::ReplayRecording => { - if !replays_enabled { - return ItemAction::DropSilently; - } - - // XXX: Processing is there just for data scrubbing. Skip the entire expensive - // processing step if we do not need to scrub. - if !scrubbing_enabled || scrubber.is_empty() { - return ItemAction::Keep; - } - - // Limit expansion of recordings to the max replay size. The payload is - // decompressed temporarily and then immediately re-compressed. However, to - // limit memory pressure, we use the replay limit as a good overall limit for - // allocations. - let parsed_recording = metric!(timer(RelayTimers::ReplayRecordingProcessing), { - scrubber.process_recording(&item.payload()) - }); - - match parsed_recording { - Ok(recording) => { - item.set_payload(ContentType::OctetStream, recording); - ItemAction::Keep - } - Err(e) => { - relay_log::warn!("replay-recording-event: {e} {event_id:?}"); - ItemAction::Drop(Outcome::Invalid( - DiscardReason::InvalidReplayRecordingEvent, - )) - } - } - } - _ => ItemAction::Keep, - }); - - Ok(()) - } - - fn process_replays_combine_items( - &self, - state: &mut ProcessEnvelopeState, - ) -> Result<(), ProcessingError> { - let project_state = &state.project_state; - let combined_envelope_items = - project_state.has_feature(Feature::SessionReplayCombinedEnvelopeItems); - - if combined_envelope_items { - // If this flag is enabled, combine both items into a single item, - // and remove the original items. - // The combined Item's payload is a MsgPack map with the keys - // "replay_event" and "replay_recording". - // The values are the original payloads of the items. - let envelope = &mut state.envelope_mut(); - if let Some(replay_event_item) = - envelope.take_item_by(|item| item.ty() == &ItemType::ReplayEvent) - { - if let Some(replay_recording_item) = - envelope.take_item_by(|item| item.ty() == &ItemType::ReplayRecording) - { - let mut data = Vec::new(); - let mut combined_item_payload = BTreeMap::new(); - - combined_item_payload - .insert("replay_event", replay_event_item.payload().to_vec()); - combined_item_payload - .insert("replay_recording", replay_recording_item.payload().to_vec()); - - if let Err(e) = rmp_serde::encode::write(&mut data, &combined_item_payload) { - relay_log::error!( - "failed to serialize combined replay event and recording: {}", - e - ); - // TODO: what to do here? Drop + emit outcome? - } - - let mut combined_item = Item::new(ItemType::CombinedReplayEventAndRecording); - - combined_item.set_payload(ContentType::MsgPack, data); - envelope.add_item(combined_item); - } else { - envelope.add_item(replay_event_item) - } - } - } - Ok(()) - } - - /// Validates, normalizes, and scrubs PII from a replay event. - fn process_replay_event( - &self, - payload: &Bytes, - config: &ProjectConfig, - client_ip: Option, - user_agent: &RawUserAgentInfo<&str>, - ) -> Result, ReplayError> { - let mut replay = - Annotated::::from_json_bytes(payload).map_err(ReplayError::CouldNotParse)?; - - if let Some(replay_value) = replay.value_mut() { - replay_value.validate()?; - replay_value.normalize(client_ip, user_agent); - } else { - return Err(ReplayError::NoContent); - } - - if let Some(ref config) = config.pii_config { - let mut processor = PiiProcessor::new(config.compiled()); - process_value(&mut replay, &mut processor, ProcessingState::root()) - .map_err(|e| ReplayError::CouldNotScrub(e.to_string()))?; - } - - let pii_config = config - .datascrubbing_settings - .pii_config() - .map_err(|e| ReplayError::CouldNotScrub(e.to_string()))?; - if let Some(config) = pii_config { - let mut processor = PiiProcessor::new(config.compiled()); - process_value(&mut replay, &mut processor, ProcessingState::root()) - .map_err(|e| ReplayError::CouldNotScrub(e.to_string()))?; - } - - Ok(replay) - } - - /// Creates and initializes the processing state. - /// - /// This applies defaults to the envelope and initializes empty rate limits. - fn prepare_state( - &self, - message: ProcessEnvelope, - ) -> Result { - let ProcessEnvelope { - envelope: mut managed_envelope, - project_state, - sampling_project_state, - } = message; - - let envelope = managed_envelope.envelope_mut(); - - // Set the event retention. Effectively, this value will only be available in processing - // mode when the full project config is queried from the upstream. - if let Some(retention) = project_state.config.event_retention { - envelope.set_retention(retention); - } - - // Prefer the project's project ID, and fall back to the stated project id from the - // envelope. The project ID is available in all modes, other than in proxy mode, where - // envelopes for unknown projects are forwarded blindly. - // - // Neither ID can be available in proxy mode on the /store/ endpoint. This is not supported, - // since we cannot process an envelope without project ID, so drop it. - let project_id = match project_state - .project_id - .or_else(|| envelope.meta().project_id()) - { - Some(project_id) => project_id, - None => { - managed_envelope.reject(Outcome::Invalid(DiscardReason::Internal)); - return Err(ProcessingError::MissingProjectId); - } - }; - - // Ensure the project ID is updated to the stored instance for this project cache. This can - // differ in two cases: - // 1. The envelope was sent to the legacy `/store/` endpoint without a project ID. - // 2. The DSN was moved and the envelope sent to the old project ID. - envelope.meta_mut().set_project_id(project_id); - - Ok(ProcessEnvelopeState { - event: Annotated::empty(), - transaction_metrics_extracted: false, - metrics: Metrics::default(), - sample_rates: None, - sampling_result: SamplingResult::Keep, - extracted_metrics: Default::default(), - project_state, - sampling_project_state, - project_id, - managed_envelope, - }) - } - - /// Expands Unreal 4 items inside an envelope. - /// - /// If the envelope does NOT contain an `UnrealReport` item, it doesn't do anything. If the - /// envelope contains an `UnrealReport` item, it removes it from the envelope and inserts new - /// items for each of its contents. - /// - /// The envelope may be dropped if it exceeds size limits after decompression. Particularly, - /// this includes cases where a single attachment file exceeds the maximum file size. This is in - /// line with the behavior of the envelope endpoint. - /// - /// After this, [`EnvelopeProcessorService`] should be able to process the envelope the same - /// way it processes any other envelopes. - #[cfg(feature = "processing")] - fn expand_unreal(&self, state: &mut ProcessEnvelopeState) -> Result<(), ProcessingError> { - let envelope = &mut state.envelope_mut(); - - if let Some(item) = envelope.take_item_by(|item| item.ty() == &ItemType::UnrealReport) { - utils::expand_unreal_envelope(item, envelope, &self.config)?; - } - - Ok(()) - } - - fn event_from_json_payload( - &self, - item: Item, - event_type: Option, - ) -> Result { - let mut event = Annotated::::from_json_bytes(&item.payload()) - .map_err(ProcessingError::InvalidJson)?; - - if let Some(event_value) = event.value_mut() { - event_value.ty.set_value(event_type); - } - - Ok((event, item.len())) - } - - fn event_from_security_report( - &self, - item: Item, - meta: &RequestMeta, - ) -> Result { - let len = item.len(); - let mut event = Event::default(); - - let data = &item.payload(); - let report_type = SecurityReportType::from_json(data) - .map_err(ProcessingError::InvalidJson)? - .ok_or(ProcessingError::InvalidSecurityType)?; - - let apply_result = match report_type { - SecurityReportType::Csp => Csp::apply_to_event(data, &mut event), - SecurityReportType::ExpectCt => ExpectCt::apply_to_event(data, &mut event), - SecurityReportType::ExpectStaple => ExpectStaple::apply_to_event(data, &mut event), - SecurityReportType::Hpkp => Hpkp::apply_to_event(data, &mut event), - }; - - if let Err(json_error) = apply_result { - // logged in extract_event - relay_log::configure_scope(|scope| { - scope.set_extra("payload", String::from_utf8_lossy(data).into()); - }); - - return Err(ProcessingError::InvalidSecurityReport(json_error)); - } - - if let Some(release) = item.get_header("sentry_release").and_then(Value::as_str) { - event.release = Annotated::from(LenientString(release.to_owned())); - } - - if let Some(env) = item - .get_header("sentry_environment") - .and_then(Value::as_str) - { - event.environment = Annotated::from(env.to_owned()); - } - - if let Some(origin) = meta.origin() { - event - .request - .get_or_insert_with(Default::default) - .headers - .get_or_insert_with(Default::default) - .insert("Origin".into(), Annotated::new(origin.to_string().into())); - } - - // Explicitly set the event type. This is required so that a `Security` item can be created - // instead of a regular `Event` item. - event.ty = Annotated::new(match report_type { - SecurityReportType::Csp => EventType::Csp, - SecurityReportType::ExpectCt => EventType::ExpectCt, - SecurityReportType::ExpectStaple => EventType::ExpectStaple, - SecurityReportType::Hpkp => EventType::Hpkp, - }); - - Ok((Annotated::new(event), len)) - } - - fn merge_formdata(&self, target: &mut SerdeValue, item: Item) { - let payload = item.payload(); - let mut aggregator = ChunkedFormDataAggregator::new(); - - for entry in FormDataIter::new(&payload) { - if entry.key() == "sentry" || entry.key().starts_with("sentry___") { - // Custom clients can submit longer payloads and should JSON encode event data into - // the optional `sentry` field or a `sentry___` field. - match serde_json::from_str(entry.value()) { - Ok(event) => utils::merge_values(target, event), - Err(_) => relay_log::debug!("invalid json event payload in sentry form field"), - } - } else if let Some(index) = utils::get_sentry_chunk_index(entry.key(), "sentry__") { - // Electron SDK splits up long payloads into chunks starting at sentry__1 with an - // incrementing counter. Assemble these chunks here and then decode them below. - aggregator.insert(index, entry.value()); - } else if let Some(keys) = utils::get_sentry_entry_indexes(entry.key()) { - // Try to parse the nested form syntax `sentry[key][key]` This is required for the - // Breakpad client library, which only supports string values of up to 64 - // characters. - utils::update_nested_value(target, &keys, entry.value()); - } else { - // Merge additional form fields from the request with `extra` data from the event - // payload and set defaults for processing. This is sent by clients like Breakpad or - // Crashpad. - utils::update_nested_value(target, &["extra", entry.key()], entry.value()); - } - } - - if !aggregator.is_empty() { - match serde_json::from_str(&aggregator.join()) { - Ok(event) => utils::merge_values(target, event), - Err(_) => relay_log::debug!("invalid json event payload in sentry__* form fields"), - } - } - } - - fn extract_attached_event( - config: &Config, - item: Option, - ) -> Result, ProcessingError> { - let item = match item { - Some(item) if !item.is_empty() => item, - _ => return Ok(Annotated::new(Event::default())), - }; - - // Protect against blowing up during deserialization. Attachments can have a significantly - // larger size than regular events and may cause significant processing delays. - if item.len() > config.max_event_size() { - return Err(ProcessingError::PayloadTooLarge); - } - - let payload = item.payload(); - let deserializer = &mut rmp_serde::Deserializer::from_read_ref(payload.as_ref()); - Annotated::deserialize_with_meta(deserializer).map_err(ProcessingError::InvalidMsgpack) - } - - fn parse_msgpack_breadcrumbs( - config: &Config, - item: Option, - ) -> Result, ProcessingError> { - let mut breadcrumbs = Array::new(); - let item = match item { - Some(item) if !item.is_empty() => item, - _ => return Ok(breadcrumbs), - }; - - // Validate that we do not exceed the maximum breadcrumb payload length. Breadcrumbs are - // truncated to a maximum of 100 in event normalization, but this is to protect us from - // blowing up during deserialization. As approximation, we use the maximum event payload - // size as bound, which is roughly in the right ballpark. - if item.len() > config.max_event_size() { - return Err(ProcessingError::PayloadTooLarge); - } - - let payload = item.payload(); - let mut deserializer = rmp_serde::Deserializer::new(payload.as_ref()); - - while !deserializer.get_ref().is_empty() { - let breadcrumb = Annotated::deserialize_with_meta(&mut deserializer)?; - breadcrumbs.push(breadcrumb); - } - - Ok(breadcrumbs) - } - - fn event_from_attachments( - config: &Config, - event_item: Option, - breadcrumbs_item1: Option, - breadcrumbs_item2: Option, - ) -> Result { - let len = event_item.as_ref().map_or(0, |item| item.len()) - + breadcrumbs_item1.as_ref().map_or(0, |item| item.len()) - + breadcrumbs_item2.as_ref().map_or(0, |item| item.len()); - - let mut event = Self::extract_attached_event(config, event_item)?; - let mut breadcrumbs1 = Self::parse_msgpack_breadcrumbs(config, breadcrumbs_item1)?; - let mut breadcrumbs2 = Self::parse_msgpack_breadcrumbs(config, breadcrumbs_item2)?; - - let timestamp1 = breadcrumbs1 - .iter() - .rev() - .find_map(|breadcrumb| breadcrumb.value().and_then(|b| b.timestamp.value())); - - let timestamp2 = breadcrumbs2 - .iter() - .rev() - .find_map(|breadcrumb| breadcrumb.value().and_then(|b| b.timestamp.value())); - - // Sort breadcrumbs by date. We presume that last timestamp from each row gives the - // relative sequence of the whole sequence, i.e., we don't need to splice the sequences - // to get the breadrumbs sorted. - if timestamp1 > timestamp2 { - std::mem::swap(&mut breadcrumbs1, &mut breadcrumbs2); - } - - // Limit the total length of the breadcrumbs. We presume that if we have both - // breadcrumbs with items one contains the maximum number of breadcrumbs allowed. - let max_length = std::cmp::max(breadcrumbs1.len(), breadcrumbs2.len()); - - breadcrumbs1.extend(breadcrumbs2); - - if breadcrumbs1.len() > max_length { - // Keep only the last max_length elements from the vectors - breadcrumbs1.drain(0..(breadcrumbs1.len() - max_length)); - } - - if !breadcrumbs1.is_empty() { - event.get_or_insert_with(Event::default).breadcrumbs = Annotated::new(Values { - values: Annotated::new(breadcrumbs1), - other: Object::default(), - }); - } - - Ok((event, len)) - } - - /// Checks for duplicate items in an envelope. - /// - /// An item is considered duplicate if it was not removed by sanitation in `process_event` and - /// `extract_event`. This partially depends on the `processing_enabled` flag. - fn is_duplicate(&self, item: &Item) -> bool { - match item.ty() { - // These should always be removed by `extract_event`: - ItemType::Event => true, - ItemType::Transaction => true, - ItemType::Security => true, - ItemType::FormData => true, - ItemType::RawSecurity => true, - - // These should be removed conditionally: - ItemType::UnrealReport => self.config.processing_enabled(), - - // These may be forwarded to upstream / store: - ItemType::Attachment => false, - ItemType::UserReport => false, - - // Aggregate data is never considered as part of deduplication - ItemType::Session => false, - ItemType::Sessions => false, - ItemType::Metrics => false, - ItemType::MetricBuckets => false, - ItemType::ClientReport => false, - ItemType::Profile => false, - ItemType::ReplayEvent => false, - ItemType::ReplayRecording => false, - ItemType::CombinedReplayEventAndRecording => false, - ItemType::CheckIn => false, - - // Without knowing more, `Unknown` items are allowed to be repeated - ItemType::Unknown(_) => false, - } - } - - /// Extracts the primary event payload from an envelope. - /// - /// The event is obtained from only one source in the following precedence: - /// 1. An explicit event item. This is also the case for JSON uploads. - /// 2. A security report item. - /// 3. Attachments `__sentry-event` and `__sentry-breadcrumb1/2`. - /// 4. A multipart form data body. - /// 5. If none match, `Annotated::empty()`. - fn extract_event(&self, state: &mut ProcessEnvelopeState) -> Result<(), ProcessingError> { - let envelope = &mut state.envelope_mut(); - - // Remove all items first, and then process them. After this function returns, only - // attachments can remain in the envelope. The event will be added again at the end of - // `process_event`. - let event_item = envelope.take_item_by(|item| item.ty() == &ItemType::Event); - let transaction_item = envelope.take_item_by(|item| item.ty() == &ItemType::Transaction); - let security_item = envelope.take_item_by(|item| item.ty() == &ItemType::Security); - let raw_security_item = envelope.take_item_by(|item| item.ty() == &ItemType::RawSecurity); - let form_item = envelope.take_item_by(|item| item.ty() == &ItemType::FormData); - let attachment_item = envelope - .take_item_by(|item| item.attachment_type() == Some(&AttachmentType::EventPayload)); - let breadcrumbs1 = envelope - .take_item_by(|item| item.attachment_type() == Some(&AttachmentType::Breadcrumbs)); - let breadcrumbs2 = envelope - .take_item_by(|item| item.attachment_type() == Some(&AttachmentType::Breadcrumbs)); - - // Event items can never occur twice in an envelope. - if let Some(duplicate) = envelope.get_item_by(|item| self.is_duplicate(item)) { - return Err(ProcessingError::DuplicateItem(duplicate.ty().clone())); - } - - let mut sample_rates = None; - let (event, event_len) = if let Some(mut item) = event_item.or(security_item) { - relay_log::trace!("processing json event"); - sample_rates = item.take_sample_rates(); - metric!(timer(RelayTimers::EventProcessingDeserialize), { - // Event items can never include transactions, so retain the event type and let - // inference deal with this during store normalization. - self.event_from_json_payload(item, None)? - }) - } else if let Some(mut item) = transaction_item { - relay_log::trace!("processing json transaction"); - sample_rates = item.take_sample_rates(); - state.transaction_metrics_extracted = item.metrics_extracted(); - metric!(timer(RelayTimers::EventProcessingDeserialize), { - // Transaction items can only contain transaction events. Force the event type to - // hint to normalization that we're dealing with a transaction now. - self.event_from_json_payload(item, Some(EventType::Transaction))? - }) - } else if let Some(mut item) = raw_security_item { - relay_log::trace!("processing security report"); - sample_rates = item.take_sample_rates(); - self.event_from_security_report(item, envelope.meta()) - .map_err(|error| { - relay_log::error!( - error = &error as &dyn Error, - "failed to extract security report" - ); - error - })? - } else if attachment_item.is_some() || breadcrumbs1.is_some() || breadcrumbs2.is_some() { - relay_log::trace!("extracting attached event data"); - Self::event_from_attachments(&self.config, attachment_item, breadcrumbs1, breadcrumbs2)? - } else if let Some(item) = form_item { - relay_log::trace!("extracting form data"); - let len = item.len(); - - let mut value = SerdeValue::Object(Default::default()); - self.merge_formdata(&mut value, item); - let event = Annotated::deserialize_with_meta(value).unwrap_or_default(); - - (event, len) - } else { - relay_log::trace!("no event in envelope"); - (Annotated::empty(), 0) - }; - - state.event = event; - state.sample_rates = sample_rates; - state.metrics.bytes_ingested_event = Annotated::new(event_len as u64); - - Ok(()) - } - - /// Extracts event information from an unreal context. - /// - /// If the event does not contain an unreal context, this function does not perform any action. - /// If there was no event payload prior to this function, it is created. - #[cfg(feature = "processing")] - fn process_unreal(&self, state: &mut ProcessEnvelopeState) -> Result<(), ProcessingError> { - utils::process_unreal_envelope(&mut state.event, state.managed_envelope.envelope_mut()) - .map_err(ProcessingError::InvalidUnrealReport) - } - - /// Adds processing placeholders for special attachments. - /// - /// If special attachments are present in the envelope, this adds placeholder payloads to the - /// event. This indicates to the pipeline that the event needs special processing. - /// - /// If the event payload was empty before, it is created. - #[cfg(feature = "processing")] - fn create_placeholders(&self, state: &mut ProcessEnvelopeState) { - let envelope = state.managed_envelope.envelope(); - let minidump_attachment = - envelope.get_item_by(|item| item.attachment_type() == Some(&AttachmentType::Minidump)); - let apple_crash_report_attachment = envelope - .get_item_by(|item| item.attachment_type() == Some(&AttachmentType::AppleCrashReport)); - - if let Some(item) = minidump_attachment { - let event = state.event.get_or_insert_with(Event::default); - state.metrics.bytes_ingested_event_minidump = Annotated::new(item.len() as u64); - utils::process_minidump(event, &item.payload()); - } else if let Some(item) = apple_crash_report_attachment { - let event = state.event.get_or_insert_with(Event::default); - state.metrics.bytes_ingested_event_applecrashreport = Annotated::new(item.len() as u64); - utils::process_apple_crash_report(event, &item.payload()); - } - } - - fn finalize_event(&self, state: &mut ProcessEnvelopeState) -> Result<(), ProcessingError> { - let is_transaction = state.event_type() == Some(EventType::Transaction); - let envelope = state.managed_envelope.envelope_mut(); - - let event = match state.event.value_mut() { - Some(event) => event, - None if !self.config.processing_enabled() => return Ok(()), - None => return Err(ProcessingError::NoEventPayload), - }; - - if !self.config.processing_enabled() { - static MY_VERSION_STRING: OnceCell = OnceCell::new(); - let my_version = MY_VERSION_STRING.get_or_init(|| RelayVersion::current().to_string()); - - event - .ingest_path - .get_or_insert_with(Default::default) - .push(Annotated::new(RelayInfo { - version: Annotated::new(my_version.clone()), - public_key: self - .config - .public_key() - .map_or(Annotated::empty(), |pk| Annotated::new(pk.to_string())), - other: Default::default(), - })); - } - - // Event id is set statically in the ingest path. - let event_id = envelope.event_id().unwrap_or_default(); - debug_assert!(!event_id.is_nil()); - - // Ensure that the event id in the payload is consistent with the envelope. If an event - // id was ingested, this will already be the case. Otherwise, this will insert a new - // event id. To be defensive, we always overwrite to ensure consistency. - event.id = Annotated::new(event_id); - - // In processing mode, also write metrics into the event. Most metrics have already been - // collected at this state, except for the combined size of all attachments. - if self.config.processing_enabled() { - let mut metrics = std::mem::take(&mut state.metrics); - - let attachment_size = envelope - .items() - .filter(|item| item.attachment_type() == Some(&AttachmentType::Attachment)) - .map(|item| item.len() as u64) - .sum::(); - - if attachment_size > 0 { - metrics.bytes_ingested_event_attachment = Annotated::new(attachment_size); - } - - let sample_rates = state - .sample_rates - .take() - .and_then(|value| Array::from_value(Annotated::new(value)).into_value()); - - if let Some(rates) = sample_rates { - metrics - .sample_rates - .get_or_insert_with(Array::new) - .extend(rates) - } - - event._metrics = Annotated::new(metrics); - - if event.ty.value() == Some(&EventType::Transaction) { - let source = event.get_transaction_source(); - - metric!( - counter(RelayCounters::EventTransactionSource) += 1, - source = &source.to_string(), - sdk = envelope.meta().client_name().unwrap_or("proprietary"), - platform = event.platform.as_str().unwrap_or("other"), - ); - - let span_count = event.spans.value().map(Vec::len).unwrap_or(0) as u64; - metric!( - histogram(RelayHistograms::EventSpans) = span_count, - sdk = envelope.meta().client_name().unwrap_or("proprietary"), - platform = event.platform.as_str().unwrap_or("other"), - ); - - let otel_context = event - .contexts - .value() - .and_then(|contexts| contexts.get("otel")) - .and_then(Annotated::value); - - if otel_context.is_some() { - metric!( - counter(RelayCounters::OpenTelemetryEvent) += 1, - sdk = envelope.meta().client_name().unwrap_or("proprietary"), - platform = event.platform.as_str().unwrap_or("other"), - ); - } - } - } - - // TODO: Temporary workaround before processing. Experimental SDKs relied on a buggy - // clock drift correction that assumes the event timestamp is the sent_at time. This - // should be removed as soon as legacy ingestion has been removed. - let sent_at = match envelope.sent_at() { - Some(sent_at) => Some(sent_at), - None if is_transaction => event.timestamp.value().copied().map(Timestamp::into_inner), - None => None, - }; - - let mut processor = ClockDriftProcessor::new(sent_at, state.managed_envelope.received_at()) - .at_least(MINIMUM_CLOCK_DRIFT); - process_value(&mut state.event, &mut processor, ProcessingState::root()) - .map_err(|_| ProcessingError::InvalidTransaction)?; - - // Log timestamp delays for all events after clock drift correction. This happens before - // store processing, which could modify the timestamp if it exceeds a threshold. We are - // interested in the actual delay before this correction. - if let Some(timestamp) = state.event.value().and_then(|e| e.timestamp.value()) { - let event_delay = state.managed_envelope.received_at() - timestamp.into_inner(); - if event_delay > SignedDuration::minutes(1) { - let category = state.event_category().unwrap_or(DataCategory::Unknown); - metric!( - timer(RelayTimers::TimestampDelay) = event_delay.to_std().unwrap(), - category = category.name(), - ); - } - } - - Ok(()) - } - - #[cfg(feature = "processing")] - fn store_process_event(&self, state: &mut ProcessEnvelopeState) -> Result<(), ProcessingError> { - let ProcessEnvelopeState { - ref mut event, - ref project_state, - ref managed_envelope, - .. - } = *state; - - let key_id = project_state - .get_public_key_config() - .and_then(|k| Some(k.numeric_id?.to_string())); - - let envelope = state.managed_envelope.envelope(); - - if key_id.is_none() { - relay_log::error!( - "project state for key {} is missing key id", - envelope.meta().public_key() - ); - } - - let store_config = StoreConfig { - project_id: Some(state.project_id.value()), - client_ip: envelope.meta().client_addr().map(IpAddr::from), - client: envelope.meta().client().map(str::to_owned), - key_id, - protocol_version: Some(envelope.meta().version().to_string()), - grouping_config: project_state.config.grouping_config.clone(), - user_agent: envelope.meta().user_agent().map(str::to_owned), - max_secs_in_future: Some(self.config.max_secs_in_future()), - max_secs_in_past: Some(self.config.max_secs_in_past()), - enable_trimming: Some(true), - is_renormalize: Some(false), - remove_other: Some(true), - normalize_user_agent: Some(true), - sent_at: envelope.sent_at(), - received_at: Some(managed_envelope.received_at()), - breakdowns: project_state.config.breakdowns_v2.clone(), - span_attributes: project_state.config.span_attributes.clone(), - client_sample_rate: envelope.dsc().and_then(|ctx| ctx.sample_rate), - replay_id: envelope.dsc().and_then(|ctx| ctx.replay_id), - client_hints: envelope.meta().client_hints().to_owned(), - }; - - let mut store_processor = StoreProcessor::new(store_config, self.geoip_lookup.as_ref()); - metric!(timer(RelayTimers::EventProcessingProcess), { - process_value(event, &mut store_processor, ProcessingState::root()) - .map_err(|_| ProcessingError::InvalidTransaction)?; - if has_unprintable_fields(event) { - metric!(counter(RelayCounters::EventCorrupted) += 1); - } - }); - - Ok(()) - } - - /// Ensures there is a valid dynamic sampling context and corresponding project state. - /// - /// The dynamic sampling context (DSC) specifies the project_key of the project that initiated - /// the trace. That project state should have been loaded previously by the project cache and is - /// available on the `ProcessEnvelopeState`. Under these conditions, this cannot happen: - /// - /// - There is no DSC in the envelope headers. This occurs with older or third-party SDKs. - /// - The project key does not exist. This can happen if the project key was disabled, the - /// project removed, or in rare cases when a project from another Sentry instance is referred - /// to. - /// - The project key refers to a project from another organization. In this case the project - /// cache does not resolve the state and instead leaves it blank. - /// - The project state could not be fetched. This is a runtime error, but in this case Relay - /// should fall back to the next-best sampling rule set. - /// - /// In all of the above cases, this function will compute a new DSC using information from the - /// event payload, similar to how SDKs do this. The `sampling_project_state` is also switched to - /// the main project state. - /// - /// If there is no transaction event in the envelope, this function will do nothing. - fn normalize_dsc(&self, state: &mut ProcessEnvelopeState) { - if state.envelope().dsc().is_some() && state.sampling_project_state.is_some() { - return; - } - - // The DSC can only be computed if there's a transaction event. Note that `from_transaction` - // below already checks for the event type. - let Some(event) = state.event.value() else { return }; - let Some(key_config) = state.project_state.get_public_key_config() else { return }; - - if let Some(dsc) = DynamicSamplingContext::from_transaction(key_config.public_key, event) { - state.envelope_mut().set_dsc(dsc); - state.sampling_project_state = Some(state.project_state.clone()); - } - } - - fn filter_event(&self, state: &mut ProcessEnvelopeState) -> Result<(), ProcessingError> { - let event = match state.event.value_mut() { - Some(event) => event, - // Some events are created by processing relays (e.g. unreal), so they do not yet - // exist at this point in non-processing relays. - None => return Ok(()), - }; - - let client_ip = state.managed_envelope.envelope().meta().client_addr(); - let filter_settings = &state.project_state.config.filter_settings; - - metric!(timer(RelayTimers::EventProcessingFiltering), { - relay_filter::should_filter(event, client_ip, filter_settings).map_err(|err| { - state.managed_envelope.reject(Outcome::Filtered(err)); - ProcessingError::EventFiltered(err) - }) - }) - } - - #[cfg(feature = "processing")] - fn enforce_quotas(&self, state: &mut ProcessEnvelopeState) -> Result<(), ProcessingError> { - let rate_limiter = match self.rate_limiter.as_ref() { - Some(rate_limiter) => rate_limiter, - None => return Ok(()), - }; - - let project_state = &state.project_state; - let quotas = project_state.config.quotas.as_slice(); - if quotas.is_empty() { - return Ok(()); - } - - let event_category = state.event_category(); - - // When invoking the rate limiter, capture if the event item has been rate limited to also - // remove it from the processing state eventually. - let mut envelope_limiter = - EnvelopeLimiter::new(Some(&project_state.config), |item_scope, quantity| { - rate_limiter.is_rate_limited(quotas, item_scope, quantity, false) - }); - - // Tell the envelope limiter about the event, since it has been removed from the Envelope at - // this stage in processing. - if let Some(category) = event_category { - envelope_limiter.assume_event(category, state.transaction_metrics_extracted); - } - - let scoping = state.managed_envelope.scoping(); - let (enforcement, limits) = metric!(timer(RelayTimers::EventProcessingRateLimiting), { - envelope_limiter.enforce(state.managed_envelope.envelope_mut(), &scoping)? - }); - - if limits.is_limited() { - self.project_cache - .send(UpdateRateLimits::new(scoping.project_key, limits)); - } - - if enforcement.event_active() { - state.remove_event(); - debug_assert!(state.envelope().is_empty()); - } - - enforcement.track_outcomes( - state.envelope(), - &state.managed_envelope.scoping(), - self.outcome_aggregator.clone(), - ); - - Ok(()) - } - - /// Extract metrics for transaction events with breakdowns and measurements. - fn extract_transaction_metrics( - &self, - state: &mut ProcessEnvelopeState, - ) -> Result<(), ProcessingError> { - if state.transaction_metrics_extracted { - // Nothing to do here. - return Ok(()); - } - - let project_config = state.project_state.config(); - let extraction_config = match project_config.transaction_metrics { - Some(ErrorBoundary::Ok(ref config)) => config, - _ => return Ok(()), - }; - - if !extraction_config.is_enabled() { - return Ok(()); - } - - let extract_spans_metrics = project_config - .features - .contains(&Feature::SpanMetricsExtraction); - - let transaction_from_dsc = state - .managed_envelope - .envelope() - .dsc() - .and_then(|dsc| dsc.transaction.as_deref()); - - if let Some(event) = state.event.value_mut() { - let result; - metric!( - timer(RelayTimers::TransactionMetricsExtraction), - extracted_anything = &result.unwrap_or(false).to_string(), - { - // Actual logic outsourced for unit tests - result = extract_transaction_metrics( - self.config.aggregator_config(), - extraction_config, - &project_config.metric_conditional_tagging, - extract_spans_metrics, - event, - transaction_from_dsc, - &state.sampling_result, - &mut state.extracted_metrics.project_metrics, - &mut state.extracted_metrics.sampling_metrics, - ); - } - ); - - result?; - - state.transaction_metrics_extracted = true; - state.managed_envelope.set_event_metrics_extracted(); - } - - Ok(()) - } - - /// Apply data privacy rules to the event payload. - /// - /// This uses both the general `datascrubbing_settings`, as well as the the PII rules. - fn scrub_event(&self, state: &mut ProcessEnvelopeState) -> Result<(), ProcessingError> { - let event = &mut state.event; - let config = &state.project_state.config; - - metric!(timer(RelayTimers::EventProcessingPii), { - if let Some(ref config) = config.pii_config { - let mut processor = PiiProcessor::new(config.compiled()); - process_value(event, &mut processor, ProcessingState::root())?; - } - let pii_config = config - .datascrubbing_settings - .pii_config() - .map_err(|e| ProcessingError::PiiConfigError(e.clone()))?; - if let Some(config) = pii_config { - let mut processor = PiiProcessor::new(config.compiled()); - process_value(event, &mut processor, ProcessingState::root())?; - } - }); - - Ok(()) - } - - /// Apply data privacy rules to attachments in the envelope. - /// - /// This only applies the new PII rules that explicitly select `ValueType::Binary` or one of the - /// attachment types. When special attachments are detected, these are scrubbed with custom - /// logic; otherwise the entire attachment is treated as a single binary blob. - fn scrub_attachments(&self, state: &mut ProcessEnvelopeState) { - let envelope = state.managed_envelope.envelope_mut(); - if let Some(ref config) = state.project_state.config.pii_config { - let minidump = envelope - .get_item_by_mut(|item| item.attachment_type() == Some(&AttachmentType::Minidump)); - - if let Some(item) = minidump { - let filename = item.filename().unwrap_or_default(); - let mut payload = item.payload().to_vec(); - - let processor = PiiAttachmentsProcessor::new(config.compiled()); - - // Minidump scrubbing can fail if the minidump cannot be parsed. In this case, we - // must be conservative and treat it as a plain attachment. Under extreme - // conditions, this could destroy stack memory. - let start = Instant::now(); - match processor.scrub_minidump(filename, &mut payload) { - Ok(modified) => { - metric!( - timer(RelayTimers::MinidumpScrubbing) = start.elapsed(), - status = if modified { "ok" } else { "n/a" }, - ); - } - Err(scrub_error) => { - metric!( - timer(RelayTimers::MinidumpScrubbing) = start.elapsed(), - status = "error" - ); - relay_log::warn!( - error = &scrub_error as &dyn Error, - "failed to scrub minidump", - ); - metric!(timer(RelayTimers::AttachmentScrubbing), { - processor.scrub_attachment(filename, &mut payload); - }) - } - } - - let content_type = item - .content_type() - .unwrap_or(&ContentType::Minidump) - .clone(); - - item.set_payload(content_type, payload); - } - } - } - - fn serialize_event(&self, state: &mut ProcessEnvelopeState) -> Result<(), ProcessingError> { - let data = metric!(timer(RelayTimers::EventProcessingSerialization), { - state - .event - .to_json() - .map_err(ProcessingError::SerializeFailed)? - }); - - let event_type = state.event_type().unwrap_or_default(); - let mut event_item = Item::new(ItemType::from_event_type(event_type)); - event_item.set_payload(ContentType::Json, data); - - // If transaction metrics were extracted, set the corresponding item header - event_item.set_metrics_extracted(state.transaction_metrics_extracted); - - // If there are sample rates, write them back to the envelope. In processing mode, sample - // rates have been removed from the state and burnt into the event via `finalize_event`. - if let Some(sample_rates) = state.sample_rates.take() { - event_item.set_sample_rates(sample_rates); - } - - state.envelope_mut().add_item(event_item); - - Ok(()) - } - - /// Computes the sampling decision on the incoming event - fn run_dynamic_sampling(&self, state: &mut ProcessEnvelopeState) { - // Running dynamic sampling involves either: - // - Tagging whether an incoming error has a sampled trace connected to it. - // - Computing the actual sampling decision on an incoming transaction. - match state.event_type().unwrap_or_default() { - EventType::Default | EventType::Error => { - self.tag_error_with_sampling_decision(state); - } - EventType::Transaction => { - self.compute_sampling_decision(state); - } - _ => {} - } - } - - /// Computes the sampling decision on the incoming transaction. - fn compute_sampling_decision(&self, state: &mut ProcessEnvelopeState) { - state.sampling_result = utils::get_sampling_result( - self.config.processing_enabled(), - Some(&state.project_state), - state.sampling_project_state.as_deref(), - state.envelope().dsc(), - state.event.value(), - state.envelope().meta().client_addr(), - ); - } - - /// Runs dynamic sampling on an incoming error and tags it in case of successful sampling - /// decision. - /// - /// This execution of dynamic sampling is technically a "simulation" since we will use the result - /// only for tagging errors and not for actually sampling incoming events. - fn tag_error_with_sampling_decision(&self, state: &mut ProcessEnvelopeState) { - // In case there is no incoming event we can't tag anything, thus we early return. - if state.event.is_empty() { - return; - } - - // We want to run dynamic sampling only if we have a root project state and a dynamic - // sampling context. - // - // In reality the dynamic sampling logic supports optional root state and dsc but it will - // return keep. In our case having a keep in case of none root state and dsc will be - // a problem, since in reality we can't infer anything without trace metadata. - let sampling_result = if let (Some(root_project_state), Some(dsc)) = ( - state.sampling_project_state.as_deref(), - state.envelope().dsc(), - ) { - utils::get_sampling_result( - self.config.processing_enabled(), - None, - Some(root_project_state), - Some(dsc), - None, - state.envelope().meta().client_addr(), - ) - } else { - return; - }; - - let Some(event) = state.event.value_mut() else { - return; - }; - - // In case the sampling result is positive, we assume that all the transactions - // that have this DSC will be sampled and thus we mark the error as "having - // a full trace". - // In case we have no contexts object, we have to create it. - let contexts = event.contexts.get_or_insert_with(Contexts::new); - - // We want to get the specific trace context, or we want to create it in case - // it is not there. - let context = - contexts.get_or_insert_with(TraceContext::default_key(), || Trace(Box::default())); - - // We want to mutate the sampled after the "fake" sampling has been performed. - // - // It is important to note that tagging only occurs if there is a dsc and root - // project state. - if let Trace(boxed_context) = context { - // We want to update `sampled` only if it was not set, since if we don't check this - // we will end up overriding the value set by downstream Relays and this will lead - // to more complex debugging in case of problems. - if boxed_context.sampled.is_empty() { - let sampled = match sampling_result { - SamplingResult::Keep => true, - SamplingResult::Drop(_) => false, - }; - relay_log::trace!("tagging error with `sampled = {}` flag", sampled); - boxed_context.sampled = Annotated::new(sampled); - } - } - } - - /// Apply the dynamic sampling decision from `compute_sampling_decision`. - fn sample_envelope(&self, state: &mut ProcessEnvelopeState) -> Result<(), ProcessingError> { - match std::mem::take(&mut state.sampling_result) { - // We assume that sampling is only supposed to work on transactions. - SamplingResult::Drop(rule_ids) - if state.event_type() == Some(EventType::Transaction) => - { - state - .managed_envelope - .reject(Outcome::FilteredSampling(rule_ids.clone())); - - Err(ProcessingError::Sampled(rule_ids)) - } - _ => Ok(()), - } - } - - fn light_normalize_event( - &self, - state: &mut ProcessEnvelopeState, - ) -> Result<(), ProcessingError> { - let request_meta = state.managed_envelope.envelope().meta(); - let client_ipaddr = request_meta.client_addr().map(IpAddr::from); - - let light_normalize_spans = state - .project_state - .has_feature(Feature::SpanMetricsExtraction); - - log_transaction_name_metrics(&mut state.event, |event| { - let config = LightNormalizationConfig { - client_ip: client_ipaddr.as_ref(), - user_agent: RawUserAgentInfo { - user_agent: request_meta.user_agent(), - client_hints: request_meta.client_hints().as_deref(), - }, - received_at: Some(state.managed_envelope.received_at()), - max_secs_in_past: Some(self.config.max_secs_in_past()), - max_secs_in_future: Some(self.config.max_secs_in_future()), - max_name_and_unit_len: Some( - self.config - .aggregator_config() - .max_name_length - .saturating_sub(MeasurementsConfig::MEASUREMENT_MRI_OVERHEAD), - ), - measurements_config: state.project_state.config.measurements.as_ref(), - breakdowns_config: state.project_state.config.breakdowns_v2.as_ref(), - normalize_user_agent: Some(true), - transaction_name_config: TransactionNameConfig { - rules: &state.project_state.config.tx_name_rules, - ready: state.project_state.config.tx_name_ready, - }, - device_class_synthesis_config: state - .project_state - .has_feature(Feature::DeviceClassSynthesis), - scrub_span_descriptions: state - .project_state - .has_feature(Feature::SpanMetricsExtraction), - is_renormalize: false, - light_normalize_spans, - }; - - metric!(timer(RelayTimers::EventProcessingLightNormalization), { - relay_general::store::light_normalize_event(event, config) - .map_err(|_| ProcessingError::InvalidTransaction) - }) - })?; - - Ok(()) - } - - fn process_state(&self, state: &mut ProcessEnvelopeState) -> Result<(), ProcessingError> { - macro_rules! if_processing { - ($if_true:block) => { - #[cfg(feature = "processing")] { - if self.config.processing_enabled() $if_true - } - }; - } - - self.process_sessions(state); - self.process_client_reports(state); - self.process_user_reports(state); - self.process_replays(state)?; - self.process_replays_combine_items(state)?; - - self.filter_profiles(state); - - // After filtering, we need to update the envelope summary: - state.managed_envelope.update(); - - if state.creates_event() { - // Some envelopes only create events in processing relays; for example, unreal events. - // This makes it possible to get in this code block while not really having an event in - // the envelope. - - if_processing!({ - self.expand_unreal(state)?; - }); - - self.extract_event(state)?; - - if_processing!({ - self.process_unreal(state)?; - self.create_placeholders(state); - }); - - self.finalize_event(state)?; - self.light_normalize_event(state)?; - self.normalize_dsc(state); - self.filter_event(state)?; - self.run_dynamic_sampling(state); - self.extract_transaction_metrics(state)?; - self.sample_envelope(state)?; - - if_processing!({ - self.store_process_event(state)?; - }); - } - - if_processing!({ - self.enforce_quotas(state)?; - // Any profile that reaches this point counts as "processed", regardless of whether - // they survive the actual `process_profiles` step. This is to be consistent with - // profiles that are dropped by dynamic sampling, which also count as "processed" - // even though they did not pass through the `process_profiles` step yet. - self.count_processed_profiles(state); - // We need the event parsed in order to set the profile context on it - self.process_profiles(state); - self.process_check_ins(state); - }); - - if state.has_event() { - self.scrub_event(state)?; - self.serialize_event(state)?; - } - - self.scrub_attachments(state); - - Ok(()) - } - - fn process( - &self, - message: ProcessEnvelope, - ) -> Result { - let mut state = self.prepare_state(message)?; - let project_id = state.project_id; - let client = state.envelope().meta().client().map(str::to_owned); - let user_agent = state.envelope().meta().user_agent().map(str::to_owned); - - relay_log::with_scope( - |scope| { - scope.set_tag("project", project_id); - if let Some(client) = client { - scope.set_tag("sdk", client); - } - if let Some(user_agent) = user_agent { - scope.set_extra("user_agent", user_agent.into()); - } - }, - || { - match self.process_state(&mut state) { - Ok(()) => { - // The envelope could be modified or even emptied during processing, which - // requires recomputation of the context. - state.managed_envelope.update(); - - let has_metrics = !state.extracted_metrics.project_metrics.is_empty(); - - state.extracted_metrics.send_metrics( - state.managed_envelope.envelope(), - self.project_cache.clone(), - ); - - let envelope_response = if state.managed_envelope.envelope().is_empty() { - if !has_metrics { - // Individual rate limits have already been issued - state.managed_envelope.reject(Outcome::RateLimited(None)); - } else { - state.managed_envelope.accept(); - } - None - } else { - Some(state.managed_envelope) - }; - - Ok(ProcessEnvelopeResponse { - envelope: envelope_response, - }) - } - Err(err) => { - if let Some(outcome) = err.to_outcome() { - state.managed_envelope.reject(outcome); - } - - if err.should_keep_metrics() { - state.extracted_metrics.send_metrics( - state.managed_envelope.envelope(), - self.project_cache.clone(), - ); - } - - Err(err) - } - } - }, - ) - } - - fn handle_process_envelope(&self, message: ProcessEnvelope) { - let project_key = message.envelope.envelope().meta().public_key(); - let wait_time = message.envelope.start_time().elapsed(); - metric!(timer(RelayTimers::EnvelopeWaitTime) = wait_time); - - let result = metric!(timer(RelayTimers::EnvelopeProcessingTime), { - self.process(message) - }); - - match result { - Ok(response) => { - if let Some(managed_envelope) = response.envelope { - self.envelope_manager.send(SubmitEnvelope { - envelope: managed_envelope, - }) - }; - } - Err(error) => { - // Errors are only logged for what we consider infrastructure or implementation - // bugs. In other cases, we "expect" errors and log them as debug level. - if error.is_unexpected() { - relay_log::error!( - tags.project_key = %project_key, - error = &error as &dyn Error, - "error processing envelope" - ); - } - } - } - } - - fn handle_process_metrics(&self, message: ProcessMetrics) { - let ProcessMetrics { - items, - project_key: public_key, - start_time, - sent_at, - } = message; - - let received = relay_common::instant_to_date_time(start_time); - let received_timestamp = UnixTimestamp::from_secs(received.timestamp() as u64); - - let clock_drift_processor = - ClockDriftProcessor::new(sent_at, received).at_least(MINIMUM_CLOCK_DRIFT); - - for item in items { - let payload = item.payload(); - if item.ty() == &ItemType::Metrics { - let mut timestamp = item.timestamp().unwrap_or(received_timestamp); - clock_drift_processor.process_timestamp(&mut timestamp); - - let metrics = - Metric::parse_all(&payload, timestamp).filter_map(|result| result.ok()); - - relay_log::trace!("inserting metrics into project cache"); - self.project_cache - .send(InsertMetrics::new(public_key, metrics)); - } else if item.ty() == &ItemType::MetricBuckets { - match Bucket::parse_all(&payload) { - Ok(mut buckets) => { - for bucket in &mut buckets { - clock_drift_processor.process_timestamp(&mut bucket.timestamp); - } - - relay_log::trace!("merging metric buckets into project cache"); - self.project_cache - .send(MergeBuckets::new(public_key, buckets)); - } - Err(error) => { - relay_log::debug!( - error = &error as &dyn Error, - "failed to parse metric bucket", - ); - metric!(counter(RelayCounters::MetricBucketsParsingFailed) += 1); - } - } - } else { - relay_log::error!( - "invalid item of type {} passed to ProcessMetrics", - item.ty() - ); - } - } - } - - /// Check and apply rate limits to metrics buckets. - #[cfg(feature = "processing")] - fn handle_rate_limit_flush_buckets(&self, message: RateLimitFlushBuckets) { - use relay_quotas::ItemScoping; - - let RateLimitFlushBuckets { - mut bucket_limiter, - partition_key, - } = message; - - let scoping = *bucket_limiter.scoping(); - - if let Some(rate_limiter) = self.rate_limiter.as_ref() { - let item_scoping = ItemScoping { - category: DataCategory::Transaction, - scoping: &scoping, - }; - - // We set over_accept_once such that the limit is actually reached, which allows subsequent - // calls with quantity=0 to be rate limited. - let over_accept_once = true; - let rate_limits = rate_limiter.is_rate_limited( - bucket_limiter.quotas(), - item_scoping, - bucket_limiter.transaction_count(), - over_accept_once, - ); - - let was_enforced = bucket_limiter.enforce_limits( - rate_limits.as_ref().map_err(|_| ()), - self.outcome_aggregator.clone(), - ); - - if was_enforced { - if let Ok(limits) = rate_limits { - // Update the rate limits in the project cache. - self.project_cache - .send(UpdateRateLimits::new(scoping.project_key, limits)); - } - } - } - - let buckets = bucket_limiter.into_metrics(); - if !buckets.is_empty() { - // Forward buckets to envelope manager to send them to upstream or kafka: - self.envelope_manager.send(SendMetrics { - buckets, - scoping, - partition_key, - }); - } - } - - fn encode_envelope_body( - body: Vec, - http_encoding: HttpEncoding, - ) -> Result, std::io::Error> { - let envelope_body = match http_encoding { - HttpEncoding::Identity => body, - HttpEncoding::Deflate => { - let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default()); - encoder.write_all(body.as_ref())?; - encoder.finish()? - } - HttpEncoding::Gzip => { - let mut encoder = GzEncoder::new(Vec::new(), Compression::default()); - encoder.write_all(body.as_ref())?; - encoder.finish()? - } - HttpEncoding::Br => { - // Use default buffer size (via 0), medium quality (5), and the default lgwin (22). - let mut encoder = BrotliEncoder::new(Vec::new(), 0, 5, 22); - encoder.write_all(body.as_ref())?; - encoder.into_inner() - } - }; - Ok(envelope_body) - } - - fn handle_encode_envelope(&self, message: EncodeEnvelope) { - let mut request = message.request; - match Self::encode_envelope_body(request.envelope_body, request.http_encoding) { - Err(e) => { - request - .response_sender - .send(Err(SendEnvelopeError::BodyEncodingFailed(e))) - .ok(); - } - Ok(envelope_body) => { - request.envelope_body = envelope_body; - self.upstream_relay.send(SendRequest(request)); - } - } - } - - fn handle_message(&self, message: EnvelopeProcessor) { - match message { - EnvelopeProcessor::ProcessEnvelope(message) => self.handle_process_envelope(*message), - EnvelopeProcessor::ProcessMetrics(message) => self.handle_process_metrics(*message), - EnvelopeProcessor::EncodeEnvelope(message) => self.handle_encode_envelope(*message), - #[cfg(feature = "processing")] - EnvelopeProcessor::RateLimitFlushBuckets(message) => { - self.handle_rate_limit_flush_buckets(message); - } - } - } -} - -impl Service for EnvelopeProcessorService { - type Interface = EnvelopeProcessor; - - fn spawn_handler(self, mut rx: relay_system::Receiver) { - let thread_count = self.config.cpu_concurrency(); - relay_log::info!("starting {thread_count} envelope processing workers"); - - tokio::spawn(async move { - let service = Arc::new(self); - let semaphore = Arc::new(Semaphore::new(thread_count)); - - while let (Some(message), Ok(permit)) = - tokio::join!(rx.recv(), semaphore.clone().acquire_owned()) - { - let service = service.clone(); - tokio::task::spawn_blocking(move || { - service.handle_message(message); - drop(permit); - }); - } - }); - } -} - -#[cfg(test)] -mod tests { - use std::str::FromStr; - - use chrono::{DateTime, TimeZone, Utc}; - use similar_asserts::assert_eq; - - use relay_common::{DurationUnit, MetricUnit, Uuid}; - use relay_general::pii::{DataScrubbingConfig, PiiConfig}; - use relay_general::protocol::{EventId, TransactionSource}; - use relay_general::store::{ - LazyGlob, MeasurementsConfig, RedactionRule, RuleScope, TransactionNameRule, - }; - use relay_sampling::{ - RuleCondition, RuleId, RuleType, SamplingConfig, SamplingMode, SamplingRule, SamplingValue, - }; - use relay_test::mock_service; - - use crate::actors::test_store::TestStore; - use crate::extractors::RequestMeta; - use crate::metrics_extraction::transactions::types::{ - CommonTags, TransactionMeasurementTags, TransactionMetric, - }; - use crate::metrics_extraction::IntoMetric; - use crate::testutils::{new_envelope, state_with_rule_and_condition}; - use crate::utils::Semaphore as TestSemaphore; - - use super::*; - - struct TestProcessSessionArguments<'a> { - item: Item, - received: DateTime, - client: Option<&'a str>, - client_addr: Option, - metrics_config: SessionMetricsConfig, - clock_drift_processor: ClockDriftProcessor, - extracted_metrics: Vec, - } - - impl<'a> TestProcessSessionArguments<'a> { - fn run_session_producer(&mut self) -> bool { - let proc = create_test_processor(Default::default()); - proc.process_session( - &mut self.item, - self.received, - self.client, - self.client_addr, - self.metrics_config, - &self.clock_drift_processor, - &mut self.extracted_metrics, - ) - } - - fn default() -> Self { - let mut item = Item::new(ItemType::Event); - - let session = r#"{ - "init": false, - "started": "2021-04-26T08:00:00+0100", - "timestamp": "2021-04-26T08:00:00+0100", - "attrs": { - "release": "1.0.0" - }, - "did": "user123", - "status": "this is not a valid status!", - "duration": 123.4 - }"#; - - item.set_payload(ContentType::Json, session); - let received = DateTime::from_str("2021-04-26T08:00:00+0100").unwrap(); - - Self { - item, - received, - client: None, - client_addr: None, - metrics_config: serde_json::from_str( - " - { - \"version\": 0, - \"drop\": true - }", - ) - .unwrap(), - clock_drift_processor: ClockDriftProcessor::new(None, received), - extracted_metrics: vec![], - } - } - } - - /// Checks that the default test-arguments leads to the item being kept, which helps ensure the - /// other tests are valid. - #[tokio::test] - async fn test_process_session_keep_item() { - let mut args = TestProcessSessionArguments::default(); - assert!(args.run_session_producer()); - } - - #[tokio::test] - async fn test_process_session_invalid_json() { - let mut args = TestProcessSessionArguments::default(); - args.item - .set_payload(ContentType::Json, "this isnt valid json"); - assert!(!args.run_session_producer()); - } - - #[tokio::test] - async fn test_process_session_sequence_overflow() { - let mut args = TestProcessSessionArguments::default(); - args.item.set_payload( - ContentType::Json, - r#"{ - "init": false, - "started": "2021-04-26T08:00:00+0100", - "timestamp": "2021-04-26T08:00:00+0100", - "seq": 18446744073709551615, - "attrs": { - "release": "1.0.0" - }, - "did": "user123", - "status": "this is not a valid status!", - "duration": 123.4 - }"#, - ); - assert!(!args.run_session_producer()); - } - - #[tokio::test] - async fn test_process_session_invalid_timestamp() { - let mut args = TestProcessSessionArguments::default(); - args.received = DateTime::from_str("2021-05-26T08:00:00+0100").unwrap(); - assert!(!args.run_session_producer()); - } - - #[tokio::test] - async fn test_process_session_metrics_extracted() { - let mut args = TestProcessSessionArguments::default(); - args.item.set_metrics_extracted(true); - assert!(!args.run_session_producer()); - } - - fn create_breadcrumbs_item(breadcrumbs: &[(Option>, &str)]) -> Item { - let mut data = Vec::new(); - - for (date, message) in breadcrumbs { - let mut breadcrumb = BTreeMap::new(); - breadcrumb.insert("message", (*message).to_string()); - if let Some(date) = date { - breadcrumb.insert("timestamp", date.to_rfc3339()); - } - - rmp_serde::encode::write(&mut data, &breadcrumb).expect("write msgpack"); - } - - let mut item = Item::new(ItemType::Attachment); - item.set_payload(ContentType::MsgPack, data); - item - } - - fn breadcrumbs_from_event(event: &Annotated) -> &Vec> { - event - .value() - .unwrap() - .breadcrumbs - .value() - .unwrap() - .values - .value() - .unwrap() - } - - fn services() -> (Addr, Addr) { - let (outcome_aggregator, _) = mock_service("outcome_aggregator", (), |&mut (), _| {}); - let (test_store, _) = mock_service("test_store", (), |&mut (), _| {}); - (outcome_aggregator, test_store) - } - - #[tokio::test] - async fn test_it_keeps_or_drops_transactions() { - relay_test::setup(); - - let (outcome_aggregator, test_store) = services(); - - // an empty json still produces a valid config - let json_config = serde_json::json!({}); - - let config = Config::from_json_value(json_config).unwrap(); - - let service = create_test_processor(config); - - let event = Event { - id: Annotated::new(EventId::new()), - ty: Annotated::new(EventType::Transaction), - transaction: Annotated::new("testing".to_owned()), - ..Event::default() - }; - - for (sample_rate, expected_result) in [ - (0.0, SamplingResult::Drop(MatchedRuleIds(vec![RuleId(1)]))), - (1.0, SamplingResult::Keep), - ] { - let project_state = state_with_rule_and_condition( - Some(sample_rate), - RuleType::Transaction, - SamplingMode::Received, - RuleCondition::all(), - ); - - let mut state = ProcessEnvelopeState { - event: Annotated::from(event.clone()), - transaction_metrics_extracted: false, - metrics: Default::default(), - sample_rates: None, - sampling_result: SamplingResult::Keep, - extracted_metrics: Default::default(), - project_state: Arc::new(project_state), - sampling_project_state: None, - project_id: ProjectId::new(42), - managed_envelope: ManagedEnvelope::new( - new_envelope(false, "foo"), - TestSemaphore::new(42).try_acquire().unwrap(), - outcome_aggregator.clone(), - test_store.clone(), - ), - }; - - // TODO: This does not test if the sampling decision is actually applied. This should be - // refactored to send a proper Envelope in and call process_state to cover the full - // pipeline. - service.compute_sampling_decision(&mut state); - assert_eq!(state.sampling_result, expected_result); - } - } - - #[test] - fn test_breadcrumbs_file1() { - let item = create_breadcrumbs_item(&[(None, "item1")]); - - // NOTE: using (Some, None) here: - let result = EnvelopeProcessorService::event_from_attachments( - &Config::default(), - None, - Some(item), - None, - ); - - let event = result.unwrap().0; - let breadcrumbs = breadcrumbs_from_event(&event); - - assert_eq!(breadcrumbs.len(), 1); - let first_breadcrumb_message = breadcrumbs[0].value().unwrap().message.value().unwrap(); - assert_eq!("item1", first_breadcrumb_message); - } - - #[test] - fn test_breadcrumbs_file2() { - let item = create_breadcrumbs_item(&[(None, "item2")]); - - // NOTE: using (None, Some) here: - let result = EnvelopeProcessorService::event_from_attachments( - &Config::default(), - None, - None, - Some(item), - ); - - let event = result.unwrap().0; - let breadcrumbs = breadcrumbs_from_event(&event); - assert_eq!(breadcrumbs.len(), 1); - - let first_breadcrumb_message = breadcrumbs[0].value().unwrap().message.value().unwrap(); - assert_eq!("item2", first_breadcrumb_message); - } - - #[test] - fn test_breadcrumbs_truncation() { - let item1 = create_breadcrumbs_item(&[(None, "crumb1")]); - let item2 = create_breadcrumbs_item(&[(None, "crumb2"), (None, "crumb3")]); - - let result = EnvelopeProcessorService::event_from_attachments( - &Config::default(), - None, - Some(item1), - Some(item2), - ); - - let event = result.unwrap().0; - let breadcrumbs = breadcrumbs_from_event(&event); - assert_eq!(breadcrumbs.len(), 2); - } - - #[test] - fn test_breadcrumbs_order_with_none() { - let d1 = Utc.with_ymd_and_hms(2019, 10, 10, 12, 10, 10).unwrap(); - let d2 = Utc.with_ymd_and_hms(2019, 10, 11, 12, 10, 10).unwrap(); - - let item1 = create_breadcrumbs_item(&[(None, "none"), (Some(d1), "d1")]); - let item2 = create_breadcrumbs_item(&[(Some(d2), "d2")]); - - let result = EnvelopeProcessorService::event_from_attachments( - &Config::default(), - None, - Some(item1), - Some(item2), - ); - - let event = result.unwrap().0; - let breadcrumbs = breadcrumbs_from_event(&event); - assert_eq!(breadcrumbs.len(), 2); - - assert_eq!(Some("d1"), breadcrumbs[0].value().unwrap().message.as_str()); - assert_eq!(Some("d2"), breadcrumbs[1].value().unwrap().message.as_str()); - } - - #[test] - fn test_breadcrumbs_reversed_with_none() { - let d1 = Utc.with_ymd_and_hms(2019, 10, 10, 12, 10, 10).unwrap(); - let d2 = Utc.with_ymd_and_hms(2019, 10, 11, 12, 10, 10).unwrap(); - - let item1 = create_breadcrumbs_item(&[(Some(d2), "d2")]); - let item2 = create_breadcrumbs_item(&[(None, "none"), (Some(d1), "d1")]); - - let result = EnvelopeProcessorService::event_from_attachments( - &Config::default(), - None, - Some(item1), - Some(item2), - ); - - let event = result.unwrap().0; - let breadcrumbs = breadcrumbs_from_event(&event); - assert_eq!(breadcrumbs.len(), 2); - - assert_eq!(Some("d1"), breadcrumbs[0].value().unwrap().message.as_str()); - assert_eq!(Some("d2"), breadcrumbs[1].value().unwrap().message.as_str()); - } - - #[test] - fn test_empty_breadcrumbs_item() { - let item1 = create_breadcrumbs_item(&[]); - let item2 = create_breadcrumbs_item(&[]); - let item3 = create_breadcrumbs_item(&[]); - - let result = EnvelopeProcessorService::event_from_attachments( - &Config::default(), - Some(item1), - Some(item2), - Some(item3), - ); - - // regression test to ensure we don't fail parsing an empty file - result.expect("event_from_attachments"); - } - - fn create_test_processor(config: Config) -> EnvelopeProcessorService { - let (envelope_manager, _) = mock_service("envelope_manager", (), |&mut (), _| {}); - let (outcome_aggregator, _) = mock_service("outcome_aggregator", (), |&mut (), _| {}); - let (project_cache, _) = mock_service("project_cache", (), |&mut (), _| {}); - let (upstream_relay, _) = mock_service("upstream_relay", (), |&mut (), _| {}); - EnvelopeProcessorService { - config: Arc::new(config), - envelope_manager, - outcome_aggregator, - project_cache, - upstream_relay, - #[cfg(feature = "processing")] - rate_limiter: None, - #[cfg(feature = "processing")] - geoip_lookup: None, - } - } - - #[tokio::test] - async fn test_user_report_invalid() { - let processor = create_test_processor(Default::default()); - let (outcome_aggregator, test_store) = services(); - let event_id = protocol::EventId::new(); - - let dsn = "https://e12d836b15bb49d7bbf99e64295d995b:@sentry.io/42" - .parse() - .unwrap(); - - let request_meta = RequestMeta::new(dsn); - let mut envelope = Envelope::from_request(Some(event_id), request_meta); - - envelope.add_item({ - let mut item = Item::new(ItemType::UserReport); - item.set_payload(ContentType::Json, r###"{"foo": "bar"}"###); - item - }); - - envelope.add_item({ - let mut item = Item::new(ItemType::Event); - item.set_payload(ContentType::Json, "{}"); - item - }); - - let message = ProcessEnvelope { - envelope: ManagedEnvelope::standalone(envelope, outcome_aggregator, test_store), - project_state: Arc::new(ProjectState::allowed()), - sampling_project_state: None, - }; - - let envelope_response = processor.process(message).unwrap(); - let ctx = envelope_response.envelope.unwrap(); - let new_envelope = ctx.envelope(); - - assert_eq!(new_envelope.len(), 1); - assert_eq!(new_envelope.items().next().unwrap().ty(), &ItemType::Event); - } - - #[tokio::test] - #[cfg(feature = "processing")] - async fn test_replays_combined_payload() { - let processor = create_test_processor(Default::default()); - let (outcome_aggregator, test_store) = services(); - let event_id = protocol::EventId::new(); - - let dsn = "https://e12d836b15bb49d7bbf99e64295d995b:@sentry.io/42" - .parse() - .unwrap(); - - let request_meta = RequestMeta::new(dsn); - let mut envelope = Envelope::from_request(Some(event_id), request_meta); - - envelope.add_item({ - let mut item = Item::new(ItemType::ReplayRecording); - item.set_payload(ContentType::OctetStream, r###"{"foo": "bar"}"###); - item - }); - - envelope.add_item({ - let mut item = Item::new(ItemType::ReplayEvent); - item.set_payload(ContentType::Json, r#"{ - "type": "replay_event", - "replay_id": "d2132d31b39445f1938d7e21b6bf0ec4", - "replay_type": "session", - "event_id": "d2132d31b39445f1938d7e21b6bf0ec4", - "segment_id": 0, - "timestamp": 1597977777.6189718, - "replay_start_timestamp": 1597976392.6542819, - "urls": ["sentry.io"], - "error_ids": ["1", "2"], - "trace_ids": ["3", "4"], - "dist": "1.12", - "platform": "javascript", - "environment": "production", - "release": 42, - "tags": { - "transaction": "/organizations/:orgId/performance/:eventSlug/" - }, - "sdk": { - "name": "name", - "version": "veresion" - }, - "user": { - "id": "123", - "username": "user", - "email": "user@site.com", - "ip_address": "192.168.11.12" - }, - "request": { - "url": null, - "headers": { - "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.5 Safari/605.1.15" - } - }, - "contexts": { - "trace": { - "trace_id": "4C79F60C11214EB38604F4AE0781BFB2", - "span_id": "FA90FDEAD5F74052", - "type": "trace" - }, - "replay": { - "error_sample_rate": 0.125, - "session_sample_rate": 0.5 - } - } - }"#); - item - }); - - let mut project_state = ProjectState::allowed(); - project_state.config.features.insert(Feature::SessionReplay); - project_state - .config - .features - .insert(Feature::SessionReplayCombinedEnvelopeItems); - - let message = ProcessEnvelope { - envelope: ManagedEnvelope::standalone(envelope, outcome_aggregator, test_store), - project_state: Arc::new(project_state), - sampling_project_state: None, - }; - - let envelope_response = processor.process(message).unwrap(); - let ctx = envelope_response.envelope.unwrap(); - let new_envelope = ctx.envelope(); - - assert_eq!(new_envelope.len(), 1); - - assert_eq!( - new_envelope.items().next().unwrap().ty(), - &ItemType::CombinedReplayEventAndRecording - ); - } - - fn process_envelope_with_root_project_state( - envelope: Box, - sampling_project_state: Option>, - ) -> Envelope { - let processor = create_test_processor(Default::default()); - let (outcome_aggregator, test_store) = services(); - - let message = ProcessEnvelope { - envelope: ManagedEnvelope::standalone(envelope, outcome_aggregator, test_store), - project_state: Arc::new(ProjectState::allowed()), - sampling_project_state, - }; - - let envelope_response = processor.process(message).unwrap(); - let ctx = envelope_response.envelope.unwrap(); - ctx.envelope().clone() - } - - fn extract_first_event_from_envelope(envelope: Envelope) -> Event { - let item = envelope.items().next().unwrap(); - let annotated_event: Annotated = - Annotated::from_json_bytes(&item.payload()).unwrap(); - annotated_event.into_value().unwrap() - } - - fn mocked_error_item() -> Item { - let mut item = Item::new(ItemType::Event); - item.set_payload( - ContentType::Json, - r#"{ - "event_id": "52df9022835246eeb317dbd739ccd059", - "exception": { - "values": [ - { - "type": "mytype", - "value": "myvalue", - "module": "mymodule", - "thread_id": 42, - "other": "value" - } - ] - } - }"#, - ); - item - } - - fn project_state_with_single_rule(sample_rate: f64) -> ProjectState { - let sampling_config = SamplingConfig { - rules: vec![], - rules_v2: vec![SamplingRule { - condition: RuleCondition::all(), - sampling_value: SamplingValue::SampleRate { value: sample_rate }, - ty: RuleType::Trace, - id: RuleId(1), - time_range: Default::default(), - decaying_fn: Default::default(), - }], - mode: SamplingMode::Received, - }; - let mut sampling_project_state = ProjectState::allowed(); - sampling_project_state.config.dynamic_sampling = Some(sampling_config); - sampling_project_state - } - - #[tokio::test] - async fn test_error_is_tagged_correctly() { - let event_id = EventId::new(); - let dsn = "https://e12d836b15bb49d7bbf99e64295d995b:@sentry.io/42" - .parse() - .unwrap(); - let request_meta = RequestMeta::new(dsn); - - let mut envelope = Envelope::from_request(Some(event_id), request_meta.clone()); - let dsc = DynamicSamplingContext { - trace_id: Uuid::new_v4(), - public_key: ProjectKey::parse("abd0f232775f45feab79864e580d160b").unwrap(), - release: Some("1.1.1".to_string()), - user: Default::default(), - replay_id: None, - environment: None, - transaction: Some("transaction1".into()), - sample_rate: None, - other: BTreeMap::new(), - }; - envelope.set_dsc(dsc); - envelope.add_item(mocked_error_item()); - - // We test the tagging when the incoming dsc matches a 100% rule. - let sampling_project_state = project_state_with_single_rule(1.0); - let new_envelope = process_envelope_with_root_project_state( - envelope.clone(), - Some(Arc::new(sampling_project_state)), - ); - let event = extract_first_event_from_envelope(new_envelope); - let trace_context = event - .contexts - .value() - .unwrap() - .get_context(TraceContext::default_key()) - .unwrap(); - - assert!(matches!(trace_context, Trace(..))); - if let Trace(context) = trace_context { - assert!(context.sampled.value().unwrap()) - } - - // We test the tagging when the incoming dsc matches a 0% rule. - let sampling_project_state = project_state_with_single_rule(0.0); - let new_envelope = process_envelope_with_root_project_state( - envelope, - Some(Arc::new(sampling_project_state)), - ); - let event = extract_first_event_from_envelope(new_envelope); - let trace_context = event - .contexts - .value() - .unwrap() - .get_context(TraceContext::default_key()) - .unwrap(); - - assert!(matches!(trace_context, Trace(..))); - if let Trace(context) = trace_context { - assert!(!context.sampled.value().unwrap()) - } - - // We test the tagging is not performed when an event is already tagged. - let mut envelope = Envelope::from_request(Some(event_id), request_meta.clone()); - let mut item = Item::new(ItemType::Event); - item.set_payload( - ContentType::Json, - r#"{ - "event_id": "52df9022835246eeb317dbd739ccd059", - "exception": { - "values": [ - { - "type": "mytype", - "value": "myvalue", - "module": "mymodule", - "thread_id": 42, - "other": "value" - } - ] - }, - "contexts": { - "trace": { - "sampled": true - } - } - }"#, - ); - envelope.add_item(item); - // We want the sampling result to be Drop, so that we can show how sampled is still kept to - // to true. - let sampling_project_state = project_state_with_single_rule(0.0); - let new_envelope = process_envelope_with_root_project_state( - envelope, - Some(Arc::new(sampling_project_state)), - ); - let event = extract_first_event_from_envelope(new_envelope); - let trace_context = event - .contexts - .value() - .unwrap() - .get_context(TraceContext::default_key()) - .unwrap(); - - assert!(matches!(trace_context, Trace(..))); - if let Trace(context) = trace_context { - assert!(context.sampled.value().unwrap()) - } - - // We test the tagging when root project state and dsc are none. - let mut envelope = Envelope::from_request(Some(event_id), request_meta); - envelope.add_item(mocked_error_item()); - let new_envelope = process_envelope_with_root_project_state(envelope, None); - let event = extract_first_event_from_envelope(new_envelope); - - assert!(event.contexts.value().is_none()); - } - - #[tokio::test] - async fn test_browser_version_extraction_with_pii_like_data() { - let processor = create_test_processor(Default::default()); - let (outcome_aggregator, test_store) = services(); - let event_id = protocol::EventId::new(); - - let dsn = "https://e12d836b15bb49d7bbf99e64295d995b:@sentry.io/42" - .parse() - .unwrap(); - - let request_meta = RequestMeta::new(dsn); - let mut envelope = Envelope::from_request(Some(event_id), request_meta); - - envelope.add_item({ - let mut item = Item::new(ItemType::Event); - item.set_payload( - ContentType::Json, - r###" - { - "request": { - "headers": [ - ["User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36"] - ] - } - } - "###, - ); - item - }); - - let mut datascrubbing_settings = DataScrubbingConfig::default(); - // enable all the default scrubbing - datascrubbing_settings.scrub_data = true; - datascrubbing_settings.scrub_defaults = true; - datascrubbing_settings.scrub_ip_addresses = true; - - // Make sure to mask any IP-like looking data - let pii_config = PiiConfig::from_json( - r##" - { - "applications": { - "**": ["@ip:mask"] - } - } - "##, - ) - .unwrap(); - - let config = ProjectConfig { - datascrubbing_settings, - pii_config: Some(pii_config), - ..Default::default() - }; - - let mut project_state = ProjectState::allowed(); - project_state.config = config; - let message = ProcessEnvelope { - envelope: ManagedEnvelope::standalone(envelope, outcome_aggregator, test_store), - project_state: Arc::new(project_state), - sampling_project_state: None, - }; - - let envelope_response = processor.process(message).unwrap(); - let new_envelope = envelope_response.envelope.unwrap(); - let new_envelope = new_envelope.envelope(); - - let event_item = new_envelope.items().last().unwrap(); - let annotated_event: Annotated = - Annotated::from_json_bytes(&event_item.payload()).unwrap(); - let event = annotated_event.into_value().unwrap(); - let headers = event - .request - .into_value() - .unwrap() - .headers - .into_value() - .unwrap(); - - // IP-like data must be masked - assert_eq!(Some("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/********* Safari/537.36"), headers.get_header("User-Agent")); - // But we still get correct browser and version number - let contexts = event.contexts.into_value().unwrap(); - let browser = contexts.get("browser").unwrap(); - assert_eq!( - r#"{"name":"Chrome","version":"103.0.0","type":"browser"}"#, - browser.to_json().unwrap() - ); - } - - #[tokio::test] - async fn test_client_report_removal() { - relay_test::setup(); - let (outcome_aggregator, test_store) = services(); - - let config = Config::from_json_value(serde_json::json!({ - "outcomes": { - "emit_outcomes": true, - "emit_client_outcomes": true - } - })) - .unwrap(); - - let processor = create_test_processor(config); - - let dsn = "https://e12d836b15bb49d7bbf99e64295d995b:@sentry.io/42" - .parse() - .unwrap(); - - let request_meta = RequestMeta::new(dsn); - let mut envelope = Envelope::from_request(None, request_meta); - - envelope.add_item({ - let mut item = Item::new(ItemType::ClientReport); - item.set_payload( - ContentType::Json, - r###" - { - "discarded_events": [ - ["queue_full", "error", 42] - ] - } - "###, - ); - item - }); - - let message = ProcessEnvelope { - envelope: ManagedEnvelope::standalone(envelope, outcome_aggregator, test_store), - project_state: Arc::new(ProjectState::allowed()), - sampling_project_state: None, - }; - - let envelope_response = processor.process(message).unwrap(); - assert!(envelope_response.envelope.is_none()); - } - - #[tokio::test] - async fn test_client_report_forwarding() { - relay_test::setup(); - let (outcome_aggregator, test_store) = services(); - - let config = Config::from_json_value(serde_json::json!({ - "outcomes": { - "emit_outcomes": false, - // a relay need to emit outcomes at all to not process. - "emit_client_outcomes": true - } - })) - .unwrap(); - - let processor = create_test_processor(config); - - let dsn = "https://e12d836b15bb49d7bbf99e64295d995b:@sentry.io/42" - .parse() - .unwrap(); - - let request_meta = RequestMeta::new(dsn); - let mut envelope = Envelope::from_request(None, request_meta); - - envelope.add_item({ - let mut item = Item::new(ItemType::ClientReport); - item.set_payload( - ContentType::Json, - r###" - { - "discarded_events": [ - ["queue_full", "error", 42] - ] - } - "###, - ); - item - }); - - let message = ProcessEnvelope { - envelope: ManagedEnvelope::standalone(envelope, outcome_aggregator, test_store), - project_state: Arc::new(ProjectState::allowed()), - sampling_project_state: None, - }; - - let envelope_response = processor.process(message).unwrap(); - let ctx = envelope_response.envelope.unwrap(); - let item = ctx.envelope().items().next().unwrap(); - assert_eq!(item.ty(), &ItemType::ClientReport); - - ctx.accept(); // do not try to capture or emit outcomes - } - - #[tokio::test] - #[cfg(feature = "processing")] - async fn test_client_report_removal_in_processing() { - relay_test::setup(); - let (outcome_aggregator, test_store) = services(); - - let config = Config::from_json_value(serde_json::json!({ - "outcomes": { - "emit_outcomes": true, - "emit_client_outcomes": false, - }, - "processing": { - "enabled": true, - "kafka_config": [], - } - })) - .unwrap(); - - let processor = create_test_processor(config); - - let dsn = "https://e12d836b15bb49d7bbf99e64295d995b:@sentry.io/42" - .parse() - .unwrap(); - - let request_meta = RequestMeta::new(dsn); - let mut envelope = Envelope::from_request(None, request_meta); - - envelope.add_item({ - let mut item = Item::new(ItemType::ClientReport); - item.set_payload( - ContentType::Json, - r###" - { - "discarded_events": [ - ["queue_full", "error", 42] - ] - } - "###, - ); - item - }); - - let message = ProcessEnvelope { - envelope: ManagedEnvelope::standalone(envelope, outcome_aggregator, test_store), - project_state: Arc::new(ProjectState::allowed()), - sampling_project_state: None, - }; - - let envelope_response = processor.process(message).unwrap(); - assert!(envelope_response.envelope.is_none()); - } - - #[test] - #[cfg(feature = "processing")] - fn test_unprintable_fields() { - let event = Annotated::new(Event { - environment: Annotated::new(String::from( - "�9�~YY���)�����9�~YY���)�����9�~YY���)�����9�~YY���)�����", - )), - ..Default::default() - }); - assert!(has_unprintable_fields(&event)); - - let event = Annotated::new(Event { - release: Annotated::new( - String::from("���7��#1G����7��#1G����7��#1G����7��#1G����7��#").into(), - ), - ..Default::default() - }); - assert!(has_unprintable_fields(&event)); - - let event = Annotated::new(Event { - environment: Annotated::new(String::from("production")), - ..Default::default() - }); - assert!(!has_unprintable_fields(&event)); - - let event = Annotated::new(Event { - release: Annotated::new( - String::from("release with\t some\n normal\r\nwhitespace").into(), - ), - ..Default::default() - }); - assert!(!has_unprintable_fields(&event)); - } - - #[test] - fn test_from_outcome_type_sampled() { - assert!(matches!( - outcome_from_parts(ClientReportField::FilteredSampling, "adsf"), - Err(_) - )); - - assert!(matches!( - outcome_from_parts(ClientReportField::FilteredSampling, "Sampled:"), - Err(_) - )); - - assert!(matches!( - outcome_from_parts(ClientReportField::FilteredSampling, "Sampled:foo"), - Err(_) - )); - - assert!(matches!( - outcome_from_parts(ClientReportField::FilteredSampling, "Sampled:"), - Err(()) - )); - - assert!(matches!( - outcome_from_parts(ClientReportField::FilteredSampling, "Sampled:;"), - Err(()) - )); - - assert!(matches!( - outcome_from_parts(ClientReportField::FilteredSampling, "Sampled:ab;12"), - Err(()) - )); - - assert_eq!( - outcome_from_parts(ClientReportField::FilteredSampling, "Sampled:123,456"), - Ok(Outcome::FilteredSampling(MatchedRuleIds(vec![ - RuleId(123), - RuleId(456), - ]))) - ); - - assert_eq!( - outcome_from_parts(ClientReportField::FilteredSampling, "Sampled:123"), - Ok(Outcome::FilteredSampling(MatchedRuleIds(vec![RuleId(123)]))) - ); - } - - #[test] - fn test_from_outcome_type_filtered() { - assert!(matches!( - outcome_from_parts(ClientReportField::Filtered, "error-message"), - Ok(Outcome::Filtered(FilterStatKey::ErrorMessage)) - )); - assert!(matches!( - outcome_from_parts(ClientReportField::Filtered, "adsf"), - Err(_) - )); - } - - #[test] - fn test_from_outcome_type_client_discard() { - assert_eq!( - outcome_from_parts(ClientReportField::ClientDiscard, "foo_reason").unwrap(), - Outcome::ClientDiscard("foo_reason".into()) - ); - } - - #[test] - fn test_from_outcome_type_rate_limited() { - assert!(matches!( - outcome_from_parts(ClientReportField::RateLimited, ""), - Ok(Outcome::RateLimited(None)) - )); - assert_eq!( - outcome_from_parts(ClientReportField::RateLimited, "foo_reason").unwrap(), - Outcome::RateLimited(Some(ReasonCode::new("foo_reason"))) - ); - } - - fn capture_test_event(transaction_name: &str, source: TransactionSource) -> Vec { - let mut event = Annotated::::from_json( - r###" - { - "type": "transaction", - "transaction": "/foo/", - "timestamp": 946684810.0, - "start_timestamp": 946684800.0, - "contexts": { - "trace": { - "trace_id": "4c79f60c11214eb38604f4ae0781bfb2", - "span_id": "fa90fdead5f74053", - "op": "http.server", - "type": "trace" - } - }, - "transaction_info": { - "source": "url" - } - } - "###, - ) - .unwrap(); - let e = event.value_mut().as_mut().unwrap(); - e.transaction.set_value(Some(transaction_name.into())); - - e.transaction_info - .value_mut() - .as_mut() - .unwrap() - .source - .set_value(Some(source)); - - relay_statsd::with_capturing_test_client(|| { - log_transaction_name_metrics(&mut event, |event| { - let config = LightNormalizationConfig { - transaction_name_config: TransactionNameConfig { - rules: &[TransactionNameRule { - pattern: LazyGlob::new("/foo/*/**".to_owned()), - expiry: DateTime::::MAX_UTC, - scope: RuleScope::default(), - redaction: RedactionRule::Replace { - substitution: "*".to_owned(), - }, - }], - ready: false, - }, - ..Default::default() - }; - relay_general::store::light_normalize_event(event, config) - }) - .unwrap(); - }) - } - - #[test] - fn test_log_transaction_metrics_none() { - let captures = capture_test_event("/nothing", TransactionSource::Url); - insta::assert_debug_snapshot!(captures, @r###" - [ - "event.transaction_name_changes:1|c|#source_in:url,changes:none,source_out:sanitized,is_404:false", - ] - "###); - } - - #[test] - fn test_log_transaction_metrics_rule() { - let captures = capture_test_event("/foo/john/denver", TransactionSource::Url); - insta::assert_debug_snapshot!(captures, @r###" - [ - "event.transaction_name_changes:1|c|#source_in:url,changes:rule,source_out:sanitized,is_404:false", - ] - "###); - } - - #[test] - fn test_log_transaction_metrics_pattern() { - let captures = capture_test_event("/something/12345", TransactionSource::Url); - insta::assert_debug_snapshot!(captures, @r###" - [ - "event.transaction_name_changes:1|c|#source_in:url,changes:pattern,source_out:sanitized,is_404:false", - ] - "###); - } - - #[test] - fn test_log_transaction_metrics_both() { - let captures = capture_test_event("/foo/john/12345", TransactionSource::Url); - insta::assert_debug_snapshot!(captures, @r###" - [ - "event.transaction_name_changes:1|c|#source_in:url,changes:both,source_out:sanitized,is_404:false", - ] - "###); - } - - #[test] - fn test_log_transaction_metrics_no_match() { - let captures = capture_test_event("/foo/john/12345", TransactionSource::Route); - insta::assert_debug_snapshot!(captures, @r###" - [ - "event.transaction_name_changes:1|c|#source_in:route,changes:none,source_out:route,is_404:false", - ] - "###); - } - - /// This is a stand-in test to assert panicking behavior for spawn_blocking. - /// - /// [`EnvelopeProcessorService`] relies on tokio to restart the worker threads for blocking - /// tasks if there is a panic during processing. Tokio does not explicitly mention this behavior - /// in documentation, though the `spawn_blocking` contract suggests that this is intentional. - /// - /// This test should be moved if the worker pool is extracted into a utility. - #[test] - fn test_processor_panics() { - let future = async { - let semaphore = Arc::new(Semaphore::new(1)); - - // loop multiple times to prove that the runtime creates new threads - for _ in 0..3 { - // the previous permit should have been released during panic unwind - let permit = semaphore.clone().acquire_owned().await.unwrap(); - - let handle = tokio::task::spawn_blocking(move || { - let _permit = permit; // drop(permit) after panic!() would warn as "unreachable" - panic!("ignored"); - }); - - assert!(handle.await.is_err()); - } - }; - - tokio::runtime::Builder::new_current_thread() - .max_blocking_threads(1) - .build() - .unwrap() - .block_on(future); - } - - /// Confirms that the hardcoded value we use for the fixed length of the measurement MRI is - /// correct. Unit test is placed here because it has dependencies to relay-server and therefore - /// cannot be called from relay-general. - #[test] - fn test_mri_overhead_constant() { - let hardcoded_value = MeasurementsConfig::MEASUREMENT_MRI_OVERHEAD; - - let derived_value = { - let name = "foobar".to_string(); - let value = 5.0; // Arbitrary value. - let unit = MetricUnit::Duration(DurationUnit::default()); - let tags = TransactionMeasurementTags { - measurement_rating: None, - universal_tags: CommonTags(BTreeMap::new()), - }; - - let measurement = TransactionMetric::Measurement { - name: name.clone(), - value, - unit, - tags, - }; - - let metric: Metric = measurement.into_metric(UnixTimestamp::now()); - - metric.name.len() - unit.to_string().len() - name.len() - }; - assert_eq!( - hardcoded_value, derived_value, - "Update `MEASUREMENT_MRI_OVERHEAD` if the naming scheme changed." - ); - } -} diff --git a/relay-server/src/services/processor/replay.rs b/relay-server/src/services/processor/replay.rs index 8831f4fba6..8d2b359933 100644 --- a/relay-server/src/services/processor/replay.rs +++ b/relay-server/src/services/processor/replay.rs @@ -1,5 +1,6 @@ //! Replay related processor code. +use std::collections::BTreeMap; use std::error::Error; use std::net::IpAddr; @@ -15,7 +16,7 @@ use relay_protocol::Annotated; use relay_replays::recording::RecordingScrubber; use relay_statsd::metric; -use crate::envelope::{ContentType, ItemType}; +use crate::envelope::{ContentType, Item, ItemType}; use crate::services::outcome::{DiscardReason, Outcome}; use crate::services::processor::{ProcessEnvelopeState, ProcessingError}; use crate::statsd::RelayTimers; @@ -113,6 +114,8 @@ pub fn process(state: &mut ProcessEnvelopeState, config: &Config) -> Result<(), _ => ItemAction::Keep, }); + process_replays_combine_items(state)?; + Ok(()) } @@ -151,3 +154,169 @@ fn process_replay_event( Ok(replay) } + +fn process_replays_combine_items(state: &mut ProcessEnvelopeState) -> Result<(), ProcessingError> { + let project_state = &state.project_state; + let combined_envelope_items = + project_state.has_feature(Feature::SessionReplayCombinedEnvelopeItems); + + if combined_envelope_items { + // If this flag is enabled, combine both items into a single item, + // and remove the original items. + // The combined Item's payload is a MsgPack map with the keys + // "replay_event" and "replay_recording". + // The values are the original payloads of the items. + let envelope = &mut state.envelope_mut(); + if let Some(replay_event_item) = + envelope.take_item_by(|item| item.ty() == &ItemType::ReplayEvent) + { + if let Some(replay_recording_item) = + envelope.take_item_by(|item| item.ty() == &ItemType::ReplayRecording) + { + let mut data = Vec::new(); + let mut combined_item_payload = BTreeMap::new(); + + combined_item_payload.insert("replay_event", replay_event_item.payload().to_vec()); + combined_item_payload + .insert("replay_recording", replay_recording_item.payload().to_vec()); + + if let Err(e) = rmp_serde::encode::write(&mut data, &combined_item_payload) { + relay_log::error!( + "failed to serialize combined replay event and recording: {}", + e + ); + // TODO: what to do here? Drop + emit outcome? + } + + let mut combined_item = Item::new(ItemType::CombinedReplayEventAndRecording); + + combined_item.set_payload(ContentType::MsgPack, data); + envelope.add_item(combined_item); + } else { + envelope.add_item(replay_event_item) + } + } + } + Ok(()) +} + +#[cfg(test)] +mod tests { + use crate::envelope::{ContentType, Envelope, Item, ItemType}; + use crate::extractors::RequestMeta; + use crate::services::processor::ProcessEnvelope; + use crate::services::project::ProjectState; + use crate::services::{outcome_aggregator, test_store}; + use crate::testutils::create_test_processor; + use crate::utils::ManagedEnvelope; + use relay_dynamic_config::Feature; + use relay_event_schema::protocol::EventId; + use relay_sampling::evaluation::ReservoirCounters; + use relay_system::Addr; + use std::sync::Arc; + + #[tokio::test] + #[cfg(feature = "processing")] + async fn test_replays_combined_payload() { + let processor = create_test_processor(Default::default()); + let event_id = EventId::new(); + + let dsn = "https://e12d836b15bb49d7bbf99e64295d995b:@sentry.io/42" + .parse() + .unwrap(); + + let request_meta = RequestMeta::new(dsn); + let mut envelope = Envelope::from_request(Some(event_id), request_meta); + + envelope.add_item({ + let mut item = Item::new(ItemType::ReplayRecording); + item.set_payload(ContentType::OctetStream, r###"{"foo": "bar"}"###); + item + }); + + envelope.add_item({ + let mut item = Item::new(ItemType::ReplayEvent); + item.set_payload(ContentType::Json, r#"{ + "type": "replay_event", + "replay_id": "d2132d31b39445f1938d7e21b6bf0ec4", + "replay_type": "session", + "event_id": "d2132d31b39445f1938d7e21b6bf0ec4", + "segment_id": 0, + "timestamp": 1597977777.6189718, + "replay_start_timestamp": 1597976392.6542819, + "urls": ["sentry.io"], + "error_ids": ["1", "2"], + "trace_ids": ["3", "4"], + "dist": "1.12", + "platform": "javascript", + "environment": "production", + "release": 42, + "tags": { + "transaction": "/organizations/:orgId/performance/:eventSlug/" + }, + "sdk": { + "name": "name", + "version": "veresion" + }, + "user": { + "id": "123", + "username": "user", + "email": "user@site.com", + "ip_address": "192.168.11.12" + }, + "request": { + "url": null, + "headers": { + "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.5 Safari/605.1.15" + } + }, + "contexts": { + "trace": { + "trace_id": "4C79F60C11214EB38604F4AE0781BFB2", + "span_id": "FA90FDEAD5F74052", + "type": "trace" + }, + "replay": { + "error_sample_rate": 0.125, + "session_sample_rate": 0.5 + } + } + }"#); + item + }); + + let mut project_state = ProjectState::allowed(); + project_state + .config + .features + .0 + .insert(Feature::SessionReplay); + project_state + .config + .features + .0 + .insert(Feature::SessionReplayCombinedEnvelopeItems); + + let mut envelopes = crate::services::processor::ProcessingGroup::split_envelope(*envelope); + + let (group, envelope) = envelopes.pop().unwrap(); + + let message = ProcessEnvelope { + envelope: ManagedEnvelope::standalone(envelope, Addr::dummy(), Addr::dummy(), group), + project_state: Arc::new(project_state), + sampling_project_state: None, + reservoir_counters: ReservoirCounters::default(), + }; + + let envelope_response = processor.process(message).unwrap(); + let ctx = envelope_response.envelope.unwrap(); + let new_envelope = ctx.envelope(); + + assert_eq!(new_envelope.len(), 1); + + assert_eq!( + new_envelope.items().next().unwrap().ty(), + &ItemType::CombinedReplayEventAndRecording + ); + } +} From da62bb81ea8adf34e8b7646220e60d15176d90da Mon Sep 17 00:00:00 2001 From: Joshua Ferge Date: Wed, 31 Jan 2024 12:33:25 -0800 Subject: [PATCH 09/72] get tests running --- relay-dynamic-config/src/feature.rs | 1 + relay-server/src/services/processor/replay.rs | 15 ++++++++------- tests/integration/test_replay_combined_payload.py | 5 +++-- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/relay-dynamic-config/src/feature.rs b/relay-dynamic-config/src/feature.rs index e768ed5cba..12d30ffb97 100644 --- a/relay-dynamic-config/src/feature.rs +++ b/relay-dynamic-config/src/feature.rs @@ -13,6 +13,7 @@ pub enum Feature { SessionReplayRecordingScrubbing, /// Enables combining session replay envelope item (Replay Recordings and Replay Events). /// into one item. + #[serde(rename = "organizations:session-replay-combined-envelope-items")] SessionReplayCombinedEnvelopeItems, /// Enables new User Feedback ingest. /// diff --git a/relay-server/src/services/processor/replay.rs b/relay-server/src/services/processor/replay.rs index 8d2b359933..378063eca7 100644 --- a/relay-server/src/services/processor/replay.rs +++ b/relay-server/src/services/processor/replay.rs @@ -197,6 +197,7 @@ fn process_replays_combine_items(state: &mut ProcessEnvelopeState) -> Result<(), } } } + Ok(()) } @@ -205,6 +206,7 @@ mod tests { use crate::envelope::{ContentType, Envelope, Item, ItemType}; use crate::extractors::RequestMeta; use crate::services::processor::ProcessEnvelope; + use crate::services::processor::ProcessingGroup; use crate::services::project::ProjectState; use crate::services::{outcome_aggregator, test_store}; use crate::testutils::create_test_processor; @@ -245,8 +247,6 @@ mod tests { "timestamp": 1597977777.6189718, "replay_start_timestamp": 1597976392.6542819, "urls": ["sentry.io"], - "error_ids": ["1", "2"], - "trace_ids": ["3", "4"], "dist": "1.12", "platform": "javascript", "environment": "production", @@ -297,12 +297,13 @@ mod tests { .0 .insert(Feature::SessionReplayCombinedEnvelopeItems); - let mut envelopes = crate::services::processor::ProcessingGroup::split_envelope(*envelope); - - let (group, envelope) = envelopes.pop().unwrap(); - let message = ProcessEnvelope { - envelope: ManagedEnvelope::standalone(envelope, Addr::dummy(), Addr::dummy(), group), + envelope: ManagedEnvelope::standalone( + envelope, + Addr::dummy(), + Addr::dummy(), + ProcessingGroup::Replay, + ), project_state: Arc::new(project_state), sampling_project_state: None, reservoir_counters: ReservoirCounters::default(), diff --git a/tests/integration/test_replay_combined_payload.py b/tests/integration/test_replay_combined_payload.py index b9eba64b10..e66b7231f3 100644 --- a/tests/integration/test_replay_combined_payload.py +++ b/tests/integration/test_replay_combined_payload.py @@ -42,10 +42,11 @@ def test_replay_combined_with_processing( payload = msgpack.unpackb(combined_replay_message["payload"]) - replay_event = json.loads(payload["replay_event"]) + replay_event = json.loads(bytes(payload["replay_event"])) + assert replay_event["replay_id"] == "515539018c9b4260a6f999572f1661ee" - assert payload["replay_recording"] == replay_recording_bytes + assert bytes(payload["replay_recording"]) == replay_recording_bytes # TODO: figure out behavior for this test From 8dbe24c014104939710d05811d7b1bd7806dbd80 Mon Sep 17 00:00:00 2001 From: Joshua Ferge Date: Wed, 31 Jan 2024 12:37:38 -0800 Subject: [PATCH 10/72] remove unused imports --- relay-server/src/services/processor/replay.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/relay-server/src/services/processor/replay.rs b/relay-server/src/services/processor/replay.rs index 378063eca7..ce26f96c9f 100644 --- a/relay-server/src/services/processor/replay.rs +++ b/relay-server/src/services/processor/replay.rs @@ -208,7 +208,6 @@ mod tests { use crate::services::processor::ProcessEnvelope; use crate::services::processor::ProcessingGroup; use crate::services::project::ProjectState; - use crate::services::{outcome_aggregator, test_store}; use crate::testutils::create_test_processor; use crate::utils::ManagedEnvelope; use relay_dynamic_config::Feature; From e29cabf215f5ace2dd3eb11c1f3bd2a7d7d5f80f Mon Sep 17 00:00:00 2001 From: Joshua Ferge Date: Wed, 31 Jan 2024 12:46:09 -0800 Subject: [PATCH 11/72] make clippy even more happy --- relay-server/src/services/processor/replay.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/relay-server/src/services/processor/replay.rs b/relay-server/src/services/processor/replay.rs index ce26f96c9f..d1de7bacb0 100644 --- a/relay-server/src/services/processor/replay.rs +++ b/relay-server/src/services/processor/replay.rs @@ -217,7 +217,6 @@ mod tests { use std::sync::Arc; #[tokio::test] - #[cfg(feature = "processing")] async fn test_replays_combined_payload() { let processor = create_test_processor(Default::default()); let event_id = EventId::new(); From 3b96c663348aa7a758384dc414461041fa509324 Mon Sep 17 00:00:00 2001 From: Joshua Ferge Date: Wed, 31 Jan 2024 15:19:19 -0800 Subject: [PATCH 12/72] add changelog entry --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 00216565c6..26d5f2c43b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ - Emit a usage metric for total spans. ([#3007](https://github.com/getsentry/relay/pull/3007)) - Drop spans ending outside the valid timestamp range. ([#3013](https://github.com/getsentry/relay/pull/3013)) +- Adds support for combining replay envelope items ([#3035](https://github.com/getsentry/relay/pull/3035)) ## 24.1.1 From 6f7433e27b3baef03934613f078b3119ca97046a Mon Sep 17 00:00:00 2001 From: Joshua Ferge Date: Mon, 5 Feb 2024 17:21:02 -0800 Subject: [PATCH 13/72] move logic to store --- relay-server/src/envelope.rs | 12 +- relay-server/src/services/processor.rs | 7 +- relay-server/src/services/processor/event.rs | 1 - relay-server/src/services/processor/replay.rs | 170 +----------------- relay-server/src/services/store.rs | 162 ++++++++++++----- relay-server/src/utils/rate_limits.rs | 1 - relay-server/src/utils/sizes.rs | 1 - .../test_replay_combined_payload.py | 50 +----- 8 files changed, 127 insertions(+), 277 deletions(-) diff --git a/relay-server/src/envelope.rs b/relay-server/src/envelope.rs index 547fe4d1b8..31f26e219f 100644 --- a/relay-server/src/envelope.rs +++ b/relay-server/src/envelope.rs @@ -113,8 +113,6 @@ pub enum ItemType { ReplayEvent, /// Replay Recording data. ReplayRecording, - /// Combined Replay metadata and Recording Payload - CombinedReplayEventAndRecording, /// Monitor check-in encoded as JSON. CheckIn, /// A standalone span. @@ -168,9 +166,6 @@ impl fmt::Display for ItemType { Self::Profile => write!(f, "profile"), Self::ReplayEvent => write!(f, "replay_event"), Self::ReplayRecording => write!(f, "replay_recording"), - Self::CombinedReplayEventAndRecording => { - write!(f, "combined_replay_event_and_recording") - } Self::CheckIn => write!(f, "check_in"), Self::Span => write!(f, "span"), Self::OtelSpan => write!(f, "otel_span"), @@ -636,10 +631,7 @@ impl Item { } else { DataCategory::Profile }), - ItemType::ReplayEvent - | ItemType::ReplayRecording - | ItemType::CombinedReplayEventAndRecording => Some(DataCategory::Replay), - + ItemType::ReplayEvent | ItemType::ReplayRecording => Some(DataCategory::Replay), ItemType::ClientReport => None, ItemType::CheckIn => Some(DataCategory::Monitor), ItemType::Span | ItemType::OtelSpan => Some(if indexed { @@ -840,7 +832,6 @@ impl Item { | ItemType::ClientReport | ItemType::ReplayEvent | ItemType::ReplayRecording - | ItemType::CombinedReplayEventAndRecording | ItemType::Profile | ItemType::CheckIn | ItemType::Span @@ -875,7 +866,6 @@ impl Item { ItemType::MetricMeta => false, ItemType::ClientReport => false, ItemType::ReplayRecording => false, - ItemType::CombinedReplayEventAndRecording => false, ItemType::Profile => true, ItemType::CheckIn => false, ItemType::Span => false, diff --git a/relay-server/src/services/processor.rs b/relay-server/src/services/processor.rs index 240d6719ac..bcfa4ad28d 100644 --- a/relay-server/src/services/processor.rs +++ b/relay-server/src/services/processor.rs @@ -1249,9 +1249,10 @@ impl EnvelopeProcessorService { /// Processes replays. fn process_replays(&self, state: &mut ProcessEnvelopeState) -> Result<(), ProcessingError> { replay::process(state, &self.inner.config)?; - if_processing!(self.inner.config, { - self.enforce_quotas(state)?; - }); + // if_processing!(self.inner.config, { + // // replay::process_replays_combine_items(state)?; + // self.enforce_quotas(state)?; + // }); Ok(()) } diff --git a/relay-server/src/services/processor/event.rs b/relay-server/src/services/processor/event.rs index 6bd4e8d419..9a030ca680 100644 --- a/relay-server/src/services/processor/event.rs +++ b/relay-server/src/services/processor/event.rs @@ -457,7 +457,6 @@ fn is_duplicate(item: &Item, processing_enabled: bool) -> bool { ItemType::CheckIn => false, ItemType::Span => false, ItemType::OtelSpan => false, - ItemType::CombinedReplayEventAndRecording => false, // Without knowing more, `Unknown` items are allowed to be repeated ItemType::Unknown(_) => false, diff --git a/relay-server/src/services/processor/replay.rs b/relay-server/src/services/processor/replay.rs index d1de7bacb0..8831f4fba6 100644 --- a/relay-server/src/services/processor/replay.rs +++ b/relay-server/src/services/processor/replay.rs @@ -1,6 +1,5 @@ //! Replay related processor code. -use std::collections::BTreeMap; use std::error::Error; use std::net::IpAddr; @@ -16,7 +15,7 @@ use relay_protocol::Annotated; use relay_replays::recording::RecordingScrubber; use relay_statsd::metric; -use crate::envelope::{ContentType, Item, ItemType}; +use crate::envelope::{ContentType, ItemType}; use crate::services::outcome::{DiscardReason, Outcome}; use crate::services::processor::{ProcessEnvelopeState, ProcessingError}; use crate::statsd::RelayTimers; @@ -114,8 +113,6 @@ pub fn process(state: &mut ProcessEnvelopeState, config: &Config) -> Result<(), _ => ItemAction::Keep, }); - process_replays_combine_items(state)?; - Ok(()) } @@ -154,168 +151,3 @@ fn process_replay_event( Ok(replay) } - -fn process_replays_combine_items(state: &mut ProcessEnvelopeState) -> Result<(), ProcessingError> { - let project_state = &state.project_state; - let combined_envelope_items = - project_state.has_feature(Feature::SessionReplayCombinedEnvelopeItems); - - if combined_envelope_items { - // If this flag is enabled, combine both items into a single item, - // and remove the original items. - // The combined Item's payload is a MsgPack map with the keys - // "replay_event" and "replay_recording". - // The values are the original payloads of the items. - let envelope = &mut state.envelope_mut(); - if let Some(replay_event_item) = - envelope.take_item_by(|item| item.ty() == &ItemType::ReplayEvent) - { - if let Some(replay_recording_item) = - envelope.take_item_by(|item| item.ty() == &ItemType::ReplayRecording) - { - let mut data = Vec::new(); - let mut combined_item_payload = BTreeMap::new(); - - combined_item_payload.insert("replay_event", replay_event_item.payload().to_vec()); - combined_item_payload - .insert("replay_recording", replay_recording_item.payload().to_vec()); - - if let Err(e) = rmp_serde::encode::write(&mut data, &combined_item_payload) { - relay_log::error!( - "failed to serialize combined replay event and recording: {}", - e - ); - // TODO: what to do here? Drop + emit outcome? - } - - let mut combined_item = Item::new(ItemType::CombinedReplayEventAndRecording); - - combined_item.set_payload(ContentType::MsgPack, data); - envelope.add_item(combined_item); - } else { - envelope.add_item(replay_event_item) - } - } - } - - Ok(()) -} - -#[cfg(test)] -mod tests { - use crate::envelope::{ContentType, Envelope, Item, ItemType}; - use crate::extractors::RequestMeta; - use crate::services::processor::ProcessEnvelope; - use crate::services::processor::ProcessingGroup; - use crate::services::project::ProjectState; - use crate::testutils::create_test_processor; - use crate::utils::ManagedEnvelope; - use relay_dynamic_config::Feature; - use relay_event_schema::protocol::EventId; - use relay_sampling::evaluation::ReservoirCounters; - use relay_system::Addr; - use std::sync::Arc; - - #[tokio::test] - async fn test_replays_combined_payload() { - let processor = create_test_processor(Default::default()); - let event_id = EventId::new(); - - let dsn = "https://e12d836b15bb49d7bbf99e64295d995b:@sentry.io/42" - .parse() - .unwrap(); - - let request_meta = RequestMeta::new(dsn); - let mut envelope = Envelope::from_request(Some(event_id), request_meta); - - envelope.add_item({ - let mut item = Item::new(ItemType::ReplayRecording); - item.set_payload(ContentType::OctetStream, r###"{"foo": "bar"}"###); - item - }); - - envelope.add_item({ - let mut item = Item::new(ItemType::ReplayEvent); - item.set_payload(ContentType::Json, r#"{ - "type": "replay_event", - "replay_id": "d2132d31b39445f1938d7e21b6bf0ec4", - "replay_type": "session", - "event_id": "d2132d31b39445f1938d7e21b6bf0ec4", - "segment_id": 0, - "timestamp": 1597977777.6189718, - "replay_start_timestamp": 1597976392.6542819, - "urls": ["sentry.io"], - "dist": "1.12", - "platform": "javascript", - "environment": "production", - "release": 42, - "tags": { - "transaction": "/organizations/:orgId/performance/:eventSlug/" - }, - "sdk": { - "name": "name", - "version": "veresion" - }, - "user": { - "id": "123", - "username": "user", - "email": "user@site.com", - "ip_address": "192.168.11.12" - }, - "request": { - "url": null, - "headers": { - "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.5 Safari/605.1.15" - } - }, - "contexts": { - "trace": { - "trace_id": "4C79F60C11214EB38604F4AE0781BFB2", - "span_id": "FA90FDEAD5F74052", - "type": "trace" - }, - "replay": { - "error_sample_rate": 0.125, - "session_sample_rate": 0.5 - } - } - }"#); - item - }); - - let mut project_state = ProjectState::allowed(); - project_state - .config - .features - .0 - .insert(Feature::SessionReplay); - project_state - .config - .features - .0 - .insert(Feature::SessionReplayCombinedEnvelopeItems); - - let message = ProcessEnvelope { - envelope: ManagedEnvelope::standalone( - envelope, - Addr::dummy(), - Addr::dummy(), - ProcessingGroup::Replay, - ), - project_state: Arc::new(project_state), - sampling_project_state: None, - reservoir_counters: ReservoirCounters::default(), - }; - - let envelope_response = processor.process(message).unwrap(); - let ctx = envelope_response.envelope.unwrap(); - let new_envelope = ctx.envelope(); - - assert_eq!(new_envelope.len(), 1); - - assert_eq!( - new_envelope.items().next().unwrap().ty(), - &ItemType::CombinedReplayEventAndRecording - ); - } -} diff --git a/relay-server/src/services/store.rs b/relay-server/src/services/store.rs index fed3db5c5d..e5b8eec66a 100644 --- a/relay-server/src/services/store.rs +++ b/relay-server/src/services/store.rs @@ -194,6 +194,9 @@ impl StoreService { }; let mut attachments = Vec::new(); + let mut replay_items: Vec<&Item> = Vec::new(); + + // if self.config for item in envelope.items() { match item.ty() { @@ -244,24 +247,20 @@ impl StoreService { item, )?, ItemType::ReplayRecording => { - self.produce_replay_recording(event_id, scoping, item, start_time, retention)? + self.produce_replay_recording(event_id, scoping, item, start_time, retention)?; + replay_items.push(item); } - ItemType::ReplayEvent => self.produce_replay_event( - event_id.ok_or(StoreError::NoEventId)?, - scoping.organization_id, - scoping.project_id, - start_time, - retention, - item, - )?, - ItemType::CombinedReplayEventAndRecording => self - .produce_combined_replay_event_and_recording( + ItemType::ReplayEvent => { + self.produce_replay_event( event_id.ok_or(StoreError::NoEventId)?, - scoping, - retention, + scoping.organization_id, + scoping.project_id, start_time, + retention, item, - )?, + )?; + replay_items.push(item); + } ItemType::CheckIn => self.produce_check_in( scoping.organization_id, scoping.project_id, @@ -277,6 +276,28 @@ impl StoreService { } } + println!("replay_items: {:?}", replay_items.len()); + if replay_items.len() == 2 { + let combined_replay_kafka_message = Self::extract_combined_replay_kafka_message( + event_id.ok_or(StoreError::NoEventId)?, + replay_items, + scoping, + start_time, + retention, + ); + if let Some(combined_replay_kafka_message) = combined_replay_kafka_message { + self.produce( + KafkaTopic::ReplayRecordings, + scoping.organization_id, + combined_replay_kafka_message, + )?; + metric!( + counter(RelayCounters::ProcessingMessageProduced) += 1, + event_type = "replay_recording_combined" + ); + } + } + if event_item.is_none() && attachments.is_empty() { // No event-related content. All done. return Ok(()); @@ -407,6 +428,42 @@ impl StoreService { attachment_iterator.chain(event_iterator) } + fn extract_combined_replay_kafka_message( + event_id: EventId, + replay_items: Vec<&Item>, + scoping: Scoping, + start_time: Instant, + retention: u16, + ) -> Option { + let mut replay_event_item = None; + let mut replay_recording_item = None; + + for item in replay_items { + match item.ty() { + ItemType::ReplayEvent => replay_event_item = Some(item), + ItemType::ReplayRecording => replay_recording_item = Some(item), + _ => {} + } + } + + match (replay_event_item, replay_recording_item) { + (Some(replay_event_item), Some(replay_recording_item)) => Some( + KafkaMessage::ReplayRecordingNotChunked(ReplayRecordingNotChunkedKafkaMessage { + replay_id: event_id, + project_id: scoping.project_id, + org_id: scoping.organization_id, + key_id: scoping.key_id, + retention_days: retention, + received: UnixTimestamp::from_instant(start_time).as_secs(), + version: Some(1), + payload: replay_recording_item.payload(), + replay_event: Some(replay_event_item.payload()), + }), + ), + _ => None, + } + } + fn produce( &self, topic: KafkaTopic, @@ -802,6 +859,7 @@ impl StoreService { retention_days: retention, payload: item.payload(), version: None, + replay_event: None, }); self.produce( @@ -821,38 +879,6 @@ impl StoreService { Ok(()) } - fn produce_combined_replay_event_and_recording( - &self, - replay_id: EventId, - scoping: Scoping, - retention_days: u16, - start_time: Instant, - item: &Item, - ) -> Result<(), StoreError> { - let message = - KafkaMessage::ReplayRecordingNotChunked(ReplayRecordingNotChunkedKafkaMessage { - replay_id, - project_id: scoping.project_id, - org_id: scoping.organization_id, - key_id: scoping.key_id, - retention_days, - received: UnixTimestamp::from_instant(start_time).as_secs(), - version: Some(1), - payload: item.payload(), - }); - - self.produce( - KafkaTopic::ReplayRecordings, - scoping.organization_id, - message, - )?; - - metric!( - counter(RelayCounters::ProcessingMessageProduced) += 1, - event_type = "replay_recording_not_chunked" - ); - Ok(()) - } fn produce_check_in( &self, organization_id: u64, @@ -1036,6 +1062,49 @@ where .serialize(serializer) } +// pub fn process_replays_combine_items( +// items: &mut Vec, +// ) -> Result<(), ProcessingError> { + +// // combine both items into a single item, +// // and remove the original items. +// // The combined Item's payload is a MsgPack map with the keys +// // "replay_event" and "replay_recording". +// // The values are the original payloads of the items. +// let envelope = &mut state.envelope_mut(); +// if let Some(replay_event_item) = +// envelope.take_item_by(|item| item.ty() == &ItemType::ReplayEvent) +// { +// if let Some(replay_recording_item) = +// envelope.take_item_by(|item| item.ty() == &ItemType::ReplayRecording) +// { +// let mut data = Vec::new(); +// let mut combined_item_payload = BTreeMap::new(); + +// combined_item_payload.insert("replay_event", replay_event_item.payload().to_vec()); +// combined_item_payload +// .insert("replay_recording", replay_recording_item.payload().to_vec()); + +// if let Err(e) = rmp_serde::encode::write(&mut data, &combined_item_payload) { +// relay_log::error!( +// "failed to serialize combined replay event and recording: {}", +// e +// ); +// // TODO: what to do here? Drop + emit outcome? +// } + +// let mut combined_item = Item::new(ItemType::CombinedReplayEventAndRecording); + +// combined_item.set_payload(ContentType::MsgPack, data); +// envelope.add_item(combined_item); +// } else { +// envelope.add_item(replay_event_item) +// } +// } + +// Ok(()) +// } + /// Container payload for event messages. #[derive(Debug, Serialize)] struct EventKafkaMessage { @@ -1172,6 +1241,7 @@ struct ReplayRecordingNotChunkedKafkaMessage { retention_days: u16, version: Option, payload: Bytes, + replay_event: Option, } /// User report for an event wrapped up in a message ready for consumption in Kafka. diff --git a/relay-server/src/utils/rate_limits.rs b/relay-server/src/utils/rate_limits.rs index b7a4464a4e..b7cdc4ba90 100644 --- a/relay-server/src/utils/rate_limits.rs +++ b/relay-server/src/utils/rate_limits.rs @@ -108,7 +108,6 @@ fn infer_event_category(item: &Item) -> Option { ItemType::Profile => None, ItemType::ReplayEvent => None, ItemType::ReplayRecording => None, - ItemType::CombinedReplayEventAndRecording => None, ItemType::ClientReport => None, ItemType::CheckIn => None, ItemType::Span => None, diff --git a/relay-server/src/utils/sizes.rs b/relay-server/src/utils/sizes.rs index af121c68cb..57bc3c9084 100644 --- a/relay-server/src/utils/sizes.rs +++ b/relay-server/src/utils/sizes.rs @@ -58,7 +58,6 @@ pub fn check_envelope_size_limits(config: &Config, envelope: &Envelope) -> Resul } // The Combined Replay Envelope isn't generated on the client so its size does not need // to be checked. - ItemType::CombinedReplayEventAndRecording => NO_LIMIT, ItemType::Profile => config.max_profile_size(), ItemType::CheckIn => config.max_check_in_size(), ItemType::UserReport => NO_LIMIT, diff --git a/tests/integration/test_replay_combined_payload.py b/tests/integration/test_replay_combined_payload.py index e66b7231f3..397923fe08 100644 --- a/tests/integration/test_replay_combined_payload.py +++ b/tests/integration/test_replay_combined_payload.py @@ -17,7 +17,6 @@ def test_replay_combined_with_processing( "config": { "features": [ "organizations:session-replay", - "organizations:session-replay-combined-envelope-items", ] } }, @@ -35,54 +34,15 @@ def test_replay_combined_with_processing( relay.send_envelope(42, envelope) + # the not-combined message will be produced first + replay_recordings_consumer.get_not_chunked_replay() combined_replay_message = replay_recordings_consumer.get_not_chunked_replay() + assert combined_replay_message["type"] == "replay_recording_not_chunked" assert combined_replay_message["replay_id"] == "515539018c9b4260a6f999572f1661ee" - assert combined_replay_message["version"] == 1 - payload = msgpack.unpackb(combined_replay_message["payload"]) + assert combined_replay_message["payload"] == replay_recording_bytes - replay_event = json.loads(bytes(payload["replay_event"])) + replay_event = json.loads(combined_replay_message["replay_event"]) assert replay_event["replay_id"] == "515539018c9b4260a6f999572f1661ee" - - assert bytes(payload["replay_recording"]) == replay_recording_bytes - - -# TODO: figure out behavior for this test -# def test_replay_combined_without_processing( -# mini_sentry, relay_chain, replay_recordings_consumer -# ): -# relay = relay_chain(min_relay_version="latest") -# replay_recordings_consumer = replay_recordings_consumer(timeout=10) -# replay_recording_bytes = b"{}\n[]" - -# mini_sentry.add_basic_project_config( -# 42, -# extra={ -# "config": { -# "features": [ -# "organizations:session-replay", -# "organizations:session-replay-combined-envelope-items", -# ] -# } -# }, -# ) - -# replay_id = "515539018c9b4260a6f999572f1661ee" - -# replay_event = generate_replay_sdk_event(replay_id=replay_id) - -# envelope = Envelope(headers=[["event_id", replay_id]]) -# envelope.add_item( -# Item(payload=PayloadRef(bytes=replay_recording_bytes), type="replay_recording") -# ) -# envelope.add_item(Item(payload=PayloadRef(json=replay_event), type="replay_event")) - -# relay.send_envelope(42, envelope) - -# envelope = mini_sentry.captured_events.get(timeout=20) -# assert len(envelope.items) == 1 - -# replay_event = envelope.items[0] -# assert replay_event.type == "replay_recording_not_chunked" From 6f825d4120fac8909197257a64e2f92ebe43a658 Mon Sep 17 00:00:00 2001 From: Joshua Ferge Date: Mon, 5 Feb 2024 17:25:50 -0800 Subject: [PATCH 14/72] fix --- relay-server/src/services/processor.rs | 7 ++- relay-server/src/services/store.rs | 49 ------------------- relay-server/src/utils/sizes.rs | 2 - tests/integration/fixtures/processing.py | 4 +- .../test_replay_combined_payload.py | 1 - tests/integration/test_replay_events.py | 10 ++-- tests/integration/test_replay_recordings.py | 2 - 7 files changed, 9 insertions(+), 66 deletions(-) diff --git a/relay-server/src/services/processor.rs b/relay-server/src/services/processor.rs index bcfa4ad28d..240d6719ac 100644 --- a/relay-server/src/services/processor.rs +++ b/relay-server/src/services/processor.rs @@ -1249,10 +1249,9 @@ impl EnvelopeProcessorService { /// Processes replays. fn process_replays(&self, state: &mut ProcessEnvelopeState) -> Result<(), ProcessingError> { replay::process(state, &self.inner.config)?; - // if_processing!(self.inner.config, { - // // replay::process_replays_combine_items(state)?; - // self.enforce_quotas(state)?; - // }); + if_processing!(self.inner.config, { + self.enforce_quotas(state)?; + }); Ok(()) } diff --git a/relay-server/src/services/store.rs b/relay-server/src/services/store.rs index e5b8eec66a..ef22f084bb 100644 --- a/relay-server/src/services/store.rs +++ b/relay-server/src/services/store.rs @@ -196,8 +196,6 @@ impl StoreService { let mut attachments = Vec::new(); let mut replay_items: Vec<&Item> = Vec::new(); - // if self.config - for item in envelope.items() { match item.ty() { ItemType::Attachment => { @@ -276,7 +274,6 @@ impl StoreService { } } - println!("replay_items: {:?}", replay_items.len()); if replay_items.len() == 2 { let combined_replay_kafka_message = Self::extract_combined_replay_kafka_message( event_id.ok_or(StoreError::NoEventId)?, @@ -455,7 +452,6 @@ impl StoreService { key_id: scoping.key_id, retention_days: retention, received: UnixTimestamp::from_instant(start_time).as_secs(), - version: Some(1), payload: replay_recording_item.payload(), replay_event: Some(replay_event_item.payload()), }), @@ -858,7 +854,6 @@ impl StoreService { received: UnixTimestamp::from_instant(start_time).as_secs(), retention_days: retention, payload: item.payload(), - version: None, replay_event: None, }); @@ -1062,49 +1057,6 @@ where .serialize(serializer) } -// pub fn process_replays_combine_items( -// items: &mut Vec, -// ) -> Result<(), ProcessingError> { - -// // combine both items into a single item, -// // and remove the original items. -// // The combined Item's payload is a MsgPack map with the keys -// // "replay_event" and "replay_recording". -// // The values are the original payloads of the items. -// let envelope = &mut state.envelope_mut(); -// if let Some(replay_event_item) = -// envelope.take_item_by(|item| item.ty() == &ItemType::ReplayEvent) -// { -// if let Some(replay_recording_item) = -// envelope.take_item_by(|item| item.ty() == &ItemType::ReplayRecording) -// { -// let mut data = Vec::new(); -// let mut combined_item_payload = BTreeMap::new(); - -// combined_item_payload.insert("replay_event", replay_event_item.payload().to_vec()); -// combined_item_payload -// .insert("replay_recording", replay_recording_item.payload().to_vec()); - -// if let Err(e) = rmp_serde::encode::write(&mut data, &combined_item_payload) { -// relay_log::error!( -// "failed to serialize combined replay event and recording: {}", -// e -// ); -// // TODO: what to do here? Drop + emit outcome? -// } - -// let mut combined_item = Item::new(ItemType::CombinedReplayEventAndRecording); - -// combined_item.set_payload(ContentType::MsgPack, data); -// envelope.add_item(combined_item); -// } else { -// envelope.add_item(replay_event_item) -// } -// } - -// Ok(()) -// } - /// Container payload for event messages. #[derive(Debug, Serialize)] struct EventKafkaMessage { @@ -1239,7 +1191,6 @@ struct ReplayRecordingNotChunkedKafkaMessage { project_id: ProjectId, received: u64, retention_days: u16, - version: Option, payload: Bytes, replay_event: Option, } diff --git a/relay-server/src/utils/sizes.rs b/relay-server/src/utils/sizes.rs index 57bc3c9084..cf02d1c584 100644 --- a/relay-server/src/utils/sizes.rs +++ b/relay-server/src/utils/sizes.rs @@ -56,8 +56,6 @@ pub fn check_envelope_size_limits(config: &Config, envelope: &Envelope) -> Resul client_reports_size += item.len(); NO_LIMIT } - // The Combined Replay Envelope isn't generated on the client so its size does not need - // to be checked. ItemType::Profile => config.max_profile_size(), ItemType::CheckIn => config.max_check_in_size(), ItemType::UserReport => NO_LIMIT, diff --git a/tests/integration/fixtures/processing.py b/tests/integration/fixtures/processing.py index 1926c55ce6..cc15d6ea00 100644 --- a/tests/integration/fixtures/processing.py +++ b/tests/integration/fixtures/processing.py @@ -288,9 +288,7 @@ def metrics_consumer(kafka_consumer): @pytest.fixture def replay_recordings_consumer(kafka_consumer): - return lambda timeout=None: ReplayRecordingsConsumer( - timeout=timeout, *kafka_consumer("replay_recordings") - ) + return lambda: ReplayRecordingsConsumer(*kafka_consumer("replay_recordings")) @pytest.fixture diff --git a/tests/integration/test_replay_combined_payload.py b/tests/integration/test_replay_combined_payload.py index 397923fe08..7c1562f9b3 100644 --- a/tests/integration/test_replay_combined_payload.py +++ b/tests/integration/test_replay_combined_payload.py @@ -1,5 +1,4 @@ from sentry_sdk.envelope import Envelope, Item, PayloadRef -import msgpack from .test_replay_events import generate_replay_sdk_event import json diff --git a/tests/integration/test_replay_events.py b/tests/integration/test_replay_events.py index cbada0b030..e4105cf4a8 100644 --- a/tests/integration/test_replay_events.py +++ b/tests/integration/test_replay_events.py @@ -2,12 +2,12 @@ import uuid -def generate_replay_sdk_event(replay_id="d2132d31b39445f1938d7e21b6bf0ec4"): +def generate_replay_sdk_event(): return { "type": "replay_event", - "replay_id": replay_id, + "replay_id": "d2132d31b39445f1938d7e21b6bf0ec4", "replay_type": "session", - "event_id": replay_id, + "event_id": "d2132d31b39445f1938d7e21b6bf0ec4", "segment_id": 0, "timestamp": 1597977777.6189718, "replay_start_timestamp": 1597976392.6542819, @@ -29,7 +29,7 @@ def generate_replay_sdk_event(replay_id="d2132d31b39445f1938d7e21b6bf0ec4"): "request": { "url": None, "headers": { - "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.5 Safari/605.1.15" + "user-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.5 Safari/605.1.15" }, }, "contexts": { @@ -97,7 +97,7 @@ def test_replay_event_with_processing( # Assert the tags and requests objects were normalized to lists of doubles. assert parsed_replay["tags"] == [["transaction", replay["tags"]["transaction"]]] assert parsed_replay["request"] == { - "headers": [["User-Agent", replay["request"]["headers"]["User-Agent"]]] + "headers": [["User-Agent", replay["request"]["headers"]["user-Agent"]]] } # Assert contexts object was pulled out. diff --git a/tests/integration/test_replay_recordings.py b/tests/integration/test_replay_recordings.py index 86b12fe207..fcb195c0fb 100644 --- a/tests/integration/test_replay_recordings.py +++ b/tests/integration/test_replay_recordings.py @@ -94,7 +94,6 @@ def test_chunked_replay_recordings_processing( assert replay_recording["key_id"] == 123 assert replay_recording["retention_days"] == 90 assert replay_recording["received"] - assert type(replay_recording["received"]) == int def test_nonchunked_replay_recordings_processing( @@ -130,7 +129,6 @@ def test_nonchunked_replay_recordings_processing( assert replay_recording["project_id"] == project_id assert replay_recording["key_id"] == 123 assert replay_recording["org_id"] == org_id - assert type(replay_recording["received"]) == int assert replay_recording["retention_days"] == 90 assert replay_recording["payload"] == payload assert replay_recording["type"] == "replay_recording_not_chunked" From aa4d3beddefd324b46fcf83422d525050a47b633 Mon Sep 17 00:00:00 2001 From: Joshua Ferge Date: Mon, 5 Feb 2024 17:27:12 -0800 Subject: [PATCH 15/72] fix merge --- tests/integration/fixtures/__init__.py | 9 +-------- tests/integration/test_replay_recordings.py | 2 ++ 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/tests/integration/fixtures/__init__.py b/tests/integration/fixtures/__init__.py index 83b3d3a1dc..1eb7ce3420 100644 --- a/tests/integration/fixtures/__init__.py +++ b/tests/integration/fixtures/__init__.py @@ -279,14 +279,7 @@ def send_transaction( self.send_envelope(project_id, envelope) def send_replay_event(self, project_id, payload, item_headers=None): - envelope = Envelope( - headers=[ - [ - "event_id", - payload["replay_id"], - ] - ] - ) + envelope = Envelope() envelope.add_item(Item(payload=PayloadRef(json=payload), type="replay_event")) if envelope.headers is None: envelope.headers = {} diff --git a/tests/integration/test_replay_recordings.py b/tests/integration/test_replay_recordings.py index fcb195c0fb..86b12fe207 100644 --- a/tests/integration/test_replay_recordings.py +++ b/tests/integration/test_replay_recordings.py @@ -94,6 +94,7 @@ def test_chunked_replay_recordings_processing( assert replay_recording["key_id"] == 123 assert replay_recording["retention_days"] == 90 assert replay_recording["received"] + assert type(replay_recording["received"]) == int def test_nonchunked_replay_recordings_processing( @@ -129,6 +130,7 @@ def test_nonchunked_replay_recordings_processing( assert replay_recording["project_id"] == project_id assert replay_recording["key_id"] == 123 assert replay_recording["org_id"] == org_id + assert type(replay_recording["received"]) == int assert replay_recording["retention_days"] == 90 assert replay_recording["payload"] == payload assert replay_recording["type"] == "replay_recording_not_chunked" From 8d06df8bf501e9a5da1b69b016136b372296873f Mon Sep 17 00:00:00 2001 From: Joshua Ferge Date: Mon, 5 Feb 2024 17:28:39 -0800 Subject: [PATCH 16/72] more cleanup --- relay-server/src/services/store.rs | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/relay-server/src/services/store.rs b/relay-server/src/services/store.rs index ef22f084bb..ad96155c84 100644 --- a/relay-server/src/services/store.rs +++ b/relay-server/src/services/store.rs @@ -1134,23 +1134,6 @@ struct ReplayRecordingChunkKafkaMessage { /// the tuple (id, chunk_index) is the unique identifier for a single chunk. chunk_index: usize, } -#[derive(Debug, Serialize)] -struct CombinedReplayEventAndRecordingKafkaMessage { - /// Raw event payload. - payload: Bytes, - /// The event id. - replay_id: EventId, - /// The project id for the current event. - project_id: ProjectId, - /// The project id for the current event. - org_id: u64, - /// The timestamp of when the recording was Received by relay - received: u64, - version: u8, - // Number of days to retain. - retention_days: u16, -} - #[derive(Debug, Serialize)] struct ReplayRecordingChunkMeta { /// The attachment ID within the event. From d8662fa2f19631e678596873fd035493bdb92c53 Mon Sep 17 00:00:00 2001 From: Josh Ferge Date: Mon, 5 Feb 2024 17:28:54 -0800 Subject: [PATCH 17/72] Update CHANGELOG.md Co-authored-by: Joris Bayer --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 26d5f2c43b..76882bc8a3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,7 +12,7 @@ - Emit a usage metric for total spans. ([#3007](https://github.com/getsentry/relay/pull/3007)) - Drop spans ending outside the valid timestamp range. ([#3013](https://github.com/getsentry/relay/pull/3013)) -- Adds support for combining replay envelope items ([#3035](https://github.com/getsentry/relay/pull/3035)) +- Add support for combining replay envelope items. ([#3035](https://github.com/getsentry/relay/pull/3035)) ## 24.1.1 From 91eeda55d97266c8728dc0f66e2c8748744bd7a1 Mon Sep 17 00:00:00 2001 From: Joshua Ferge Date: Mon, 5 Feb 2024 17:29:28 -0800 Subject: [PATCH 18/72] remove feature --- relay-dynamic-config/src/feature.rs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/relay-dynamic-config/src/feature.rs b/relay-dynamic-config/src/feature.rs index 12d30ffb97..803d047b55 100644 --- a/relay-dynamic-config/src/feature.rs +++ b/relay-dynamic-config/src/feature.rs @@ -11,10 +11,6 @@ pub enum Feature { /// Enables data scrubbing of replay recording payloads. #[serde(rename = "organizations:session-replay-recording-scrubbing")] SessionReplayRecordingScrubbing, - /// Enables combining session replay envelope item (Replay Recordings and Replay Events). - /// into one item. - #[serde(rename = "organizations:session-replay-combined-envelope-items")] - SessionReplayCombinedEnvelopeItems, /// Enables new User Feedback ingest. /// /// TODO(jferg): rename to UserFeedbackIngest once old UserReport logic is deprecated. From 848a51323e12715e2e38f15ffbfe3c87b4f33cf8 Mon Sep 17 00:00:00 2001 From: Joshua Ferge Date: Thu, 8 Feb 2024 14:40:46 -0800 Subject: [PATCH 19/72] use flag in header --- relay-dynamic-config/src/feature.rs | 4 + relay-server/src/envelope.rs | 16 ++++ relay-server/src/services/processor/replay.rs | 20 ++++- relay-server/src/services/store.rs | 83 +++++++++---------- .../test_replay_combined_payload.py | 11 +-- 5 files changed, 84 insertions(+), 50 deletions(-) diff --git a/relay-dynamic-config/src/feature.rs b/relay-dynamic-config/src/feature.rs index 803d047b55..12d30ffb97 100644 --- a/relay-dynamic-config/src/feature.rs +++ b/relay-dynamic-config/src/feature.rs @@ -11,6 +11,10 @@ pub enum Feature { /// Enables data scrubbing of replay recording payloads. #[serde(rename = "organizations:session-replay-recording-scrubbing")] SessionReplayRecordingScrubbing, + /// Enables combining session replay envelope item (Replay Recordings and Replay Events). + /// into one item. + #[serde(rename = "organizations:session-replay-combined-envelope-items")] + SessionReplayCombinedEnvelopeItems, /// Enables new User Feedback ingest. /// /// TODO(jferg): rename to UserFeedbackIngest once old UserReport logic is deprecated. diff --git a/relay-server/src/envelope.rs b/relay-server/src/envelope.rs index 31f26e219f..9406973e20 100644 --- a/relay-server/src/envelope.rs +++ b/relay-server/src/envelope.rs @@ -489,6 +489,11 @@ pub struct ItemHeaders { #[serde(default, skip)] rate_limited: bool, + /// Indicates that this item should be combined into one payload with othe replay item. + /// NOTE: This is internal-only and not exposed into the Envelope. + #[serde(default, skip)] + replay_combined_payload: bool, + /// Contains the amount of events this item was generated and aggregated from. /// /// A [metrics buckets](`ItemType::MetricBuckets`) item contains metrics extracted and @@ -576,6 +581,7 @@ impl Item { filename: None, routing_hint: None, rate_limited: false, + replay_combined_payload: false, source_quantities: None, sample_rates: None, other: BTreeMap::new(), @@ -739,6 +745,16 @@ impl Item { self.headers.source_quantities = Some(source_quantities); } + /// Returns the contained source quantities. + pub fn replay_combined_payload(&self) -> bool { + self.headers.replay_combined_payload + } + + /// Sets new source quantities. + pub fn set_replay_combined_payload(&mut self, combined_payload: bool) { + self.headers.replay_combined_payload = combined_payload; + } + /// Sets sample rates for this item. pub fn set_sample_rates(&mut self, sample_rates: Value) { if matches!(sample_rates, Value::Array(ref a) if !a.is_empty()) { diff --git a/relay-server/src/services/processor/replay.rs b/relay-server/src/services/processor/replay.rs index 8831f4fba6..c93e3f3ef7 100644 --- a/relay-server/src/services/processor/replay.rs +++ b/relay-server/src/services/processor/replay.rs @@ -48,20 +48,29 @@ pub fn process(state: &mut ProcessEnvelopeState, config: &Config) -> Result<(), user_agent: meta.user_agent(), client_hints: meta.client_hints().as_deref(), }; + let combined_envelope_items = + project_state.has_feature(Feature::SessionReplayCombinedEnvelopeItems); state.managed_envelope.retain_items(|item| match item.ty() { ItemType::ReplayEvent => { if !replays_enabled { + println!("1"); return ItemAction::DropSilently; } + if combined_envelope_items { + println!("2"); + item.set_replay_combined_payload(true); + } match process_replay_event(&item.payload(), project_config, client_addr, user_agent) { Ok(replay) => match replay.to_json() { Ok(json) => { + println!("3"); item.set_payload(ContentType::Json, json); ItemAction::Keep } Err(error) => { + println!("4"); relay_log::error!( error = &error as &dyn Error, "failed to serialize replay" @@ -70,6 +79,7 @@ pub fn process(state: &mut ProcessEnvelopeState, config: &Config) -> Result<(), } }, Err(error) => { + println!("5"); relay_log::warn!(error = &error as &dyn Error, "invalid replay event"); ItemAction::Drop(Outcome::Invalid(match error { ReplayError::NoContent => DiscardReason::InvalidReplayEventNoPayload, @@ -82,12 +92,18 @@ pub fn process(state: &mut ProcessEnvelopeState, config: &Config) -> Result<(), } ItemType::ReplayRecording => { if !replays_enabled { + println!("6"); return ItemAction::DropSilently; } + if combined_envelope_items { + println!("7"); + item.set_header("replay_combined_payload", true); + } // XXX: Processing is there just for data scrubbing. Skip the entire expensive // processing step if we do not need to scrub. if !scrubbing_enabled || scrubber.is_empty() { + println!("8"); return ItemAction::Keep; } @@ -101,10 +117,12 @@ pub fn process(state: &mut ProcessEnvelopeState, config: &Config) -> Result<(), match parsed_recording { Ok(recording) => { + println!("9"); item.set_payload(ContentType::OctetStream, recording); ItemAction::Keep } Err(e) => { + println!("10"); relay_log::warn!("replay-recording-event: {e} {event_id:?}"); ItemAction::Drop(Outcome::Invalid(DiscardReason::InvalidReplayRecordingEvent)) } @@ -148,6 +166,6 @@ fn process_replay_event( processor::process_value(&mut replay, &mut processor, ProcessingState::root()) .map_err(|e| ReplayError::CouldNotScrub(e.to_string()))?; } - + println!("11"); Ok(replay) } diff --git a/relay-server/src/services/store.rs b/relay-server/src/services/store.rs index ad96155c84..ac5558cc60 100644 --- a/relay-server/src/services/store.rs +++ b/relay-server/src/services/store.rs @@ -172,6 +172,8 @@ impl StoreService { start_time: Instant, scoping: Scoping, ) -> Result<(), StoreError> { + println!("20"); + let retention = envelope.retention(); let client = envelope.meta().client(); let event_id = envelope.event_id(); @@ -194,7 +196,10 @@ impl StoreService { }; let mut attachments = Vec::new(); - let mut replay_items: Vec<&Item> = Vec::new(); + + let mut replay_event = None; + let mut replay_recording = None; + println!("HERE??????????"); for item in envelope.items() { match item.ty() { @@ -245,8 +250,12 @@ impl StoreService { item, )?, ItemType::ReplayRecording => { + if item.replay_combined_payload() { + replay_recording = Some(item); + } + println!("HERE??????????2"); + self.produce_replay_recording(event_id, scoping, item, start_time, retention)?; - replay_items.push(item); } ItemType::ReplayEvent => { self.produce_replay_event( @@ -257,7 +266,9 @@ impl StoreService { retention, item, )?; - replay_items.push(item); + if item.replay_combined_payload() { + replay_event = Some(item); + } } ItemType::CheckIn => self.produce_check_in( scoping.organization_id, @@ -274,25 +285,24 @@ impl StoreService { } } - if replay_items.len() == 2 { + if let (Some(replay_event), Some(replay_recording)) = (replay_event, replay_recording) { let combined_replay_kafka_message = Self::extract_combined_replay_kafka_message( event_id.ok_or(StoreError::NoEventId)?, - replay_items, + replay_event, + replay_recording, scoping, start_time, retention, ); - if let Some(combined_replay_kafka_message) = combined_replay_kafka_message { - self.produce( - KafkaTopic::ReplayRecordings, - scoping.organization_id, - combined_replay_kafka_message, - )?; - metric!( - counter(RelayCounters::ProcessingMessageProduced) += 1, - event_type = "replay_recording_combined" - ); - } + self.produce( + KafkaTopic::ReplayRecordings, + scoping.organization_id, + KafkaMessage::ReplayRecordingNotChunked(combined_replay_kafka_message), + )?; + metric!( + counter(RelayCounters::ProcessingMessageProduced) += 1, + event_type = "replay_recording_combined" + ); } if event_item.is_none() && attachments.is_empty() { @@ -427,37 +437,22 @@ impl StoreService { fn extract_combined_replay_kafka_message( event_id: EventId, - replay_items: Vec<&Item>, + replay_event: &Item, + replay_recording: &Item, scoping: Scoping, start_time: Instant, retention: u16, - ) -> Option { - let mut replay_event_item = None; - let mut replay_recording_item = None; - - for item in replay_items { - match item.ty() { - ItemType::ReplayEvent => replay_event_item = Some(item), - ItemType::ReplayRecording => replay_recording_item = Some(item), - _ => {} - } - } - - match (replay_event_item, replay_recording_item) { - (Some(replay_event_item), Some(replay_recording_item)) => Some( - KafkaMessage::ReplayRecordingNotChunked(ReplayRecordingNotChunkedKafkaMessage { - replay_id: event_id, - project_id: scoping.project_id, - org_id: scoping.organization_id, - key_id: scoping.key_id, - retention_days: retention, - received: UnixTimestamp::from_instant(start_time).as_secs(), - payload: replay_recording_item.payload(), - replay_event: Some(replay_event_item.payload()), - }), - ), - _ => None, - } + ) -> ReplayRecordingNotChunkedKafkaMessage { + return ReplayRecordingNotChunkedKafkaMessage { + replay_id: event_id, + project_id: scoping.project_id, + org_id: scoping.organization_id, + key_id: scoping.key_id, + retention_days: retention, + received: UnixTimestamp::from_instant(start_time).as_secs(), + payload: replay_recording.payload(), + replay_event: Some(replay_event.payload()), + }; } fn produce( diff --git a/tests/integration/test_replay_combined_payload.py b/tests/integration/test_replay_combined_payload.py index 7c1562f9b3..c26cadb8a9 100644 --- a/tests/integration/test_replay_combined_payload.py +++ b/tests/integration/test_replay_combined_payload.py @@ -8,7 +8,7 @@ def test_replay_combined_with_processing( ): replay_recording_bytes = b"{}\n[]" relay = relay_with_processing() - replay_recordings_consumer = replay_recordings_consumer(timeout=10) + replay_recordings_consumer = replay_recordings_consumer() mini_sentry.add_basic_project_config( 42, @@ -16,14 +16,15 @@ def test_replay_combined_with_processing( "config": { "features": [ "organizations:session-replay", + "organizations:session-replay-combined-envelope-items", ] } }, ) - replay_id = "515539018c9b4260a6f999572f1661ee" + replay_id = "d2132d31b39445f1938d7e21b6bf0ec4" - replay_event = generate_replay_sdk_event(replay_id=replay_id) + replay_event = generate_replay_sdk_event() envelope = Envelope(headers=[["event_id", replay_id]]) envelope.add_item( @@ -38,10 +39,10 @@ def test_replay_combined_with_processing( combined_replay_message = replay_recordings_consumer.get_not_chunked_replay() assert combined_replay_message["type"] == "replay_recording_not_chunked" - assert combined_replay_message["replay_id"] == "515539018c9b4260a6f999572f1661ee" + assert combined_replay_message["replay_id"] == "d2132d31b39445f1938d7e21b6bf0ec4" assert combined_replay_message["payload"] == replay_recording_bytes replay_event = json.loads(combined_replay_message["replay_event"]) - assert replay_event["replay_id"] == "515539018c9b4260a6f999572f1661ee" + assert replay_event["replay_id"] == "d2132d31b39445f1938d7e21b6bf0ec4" From 0540710868a918460f9502d15bd81205aec7834e Mon Sep 17 00:00:00 2001 From: Joshua Ferge Date: Thu, 8 Feb 2024 15:13:58 -0800 Subject: [PATCH 20/72] try to debug --- .../src/normalize/user_agent.rs | 6 +++ relay-event-normalization/src/replay.rs | 15 +++++- relay-server/src/services/processor.rs | 1 + relay-server/src/services/processor/replay.rs | 11 +++- .../test_replay_combined_payload.py | 52 +++++++++---------- tests/integration/test_replay_events.py | 6 +-- 6 files changed, 59 insertions(+), 32 deletions(-) diff --git a/relay-event-normalization/src/normalize/user_agent.rs b/relay-event-normalization/src/normalize/user_agent.rs index 2ffc44c210..43fd5f16f4 100644 --- a/relay-event-normalization/src/normalize/user_agent.rs +++ b/relay-event-normalization/src/normalize/user_agent.rs @@ -46,17 +46,21 @@ pub fn normalize_user_agent_info_generic( platform: &Annotated, user_agent_info: &RawUserAgentInfo<&str>, ) { + println!("60"); if !contexts.contains::() { if let Some(browser_context) = BrowserContext::from_hints_or_ua(user_agent_info) { + println!("62"); contexts.add(browser_context); } } + println!("61"); if !contexts.contains::() { if let Some(device_context) = DeviceContext::from_hints_or_ua(user_agent_info) { contexts.add(device_context); } } + println!("62"); // avoid conflicts with OS-context sent by a serverside SDK by using `contexts.client_os` // instead of `contexts.os`. This is then preferred by the UI to show alongside device and @@ -68,9 +72,11 @@ pub fn normalize_user_agent_info_generic( Some("javascript") => OsContext::default_key(), _ => "client_os", }; + println!("63"); if !contexts.contains_key(os_context_key) { if let Some(os_context) = OsContext::from_hints_or_ua(user_agent_info) { + println!("64"); contexts.insert(os_context_key.to_owned(), Context::Os(Box::new(os_context))); } } diff --git a/relay-event-normalization/src/replay.rs b/relay-event-normalization/src/replay.rs index 8d2d294bc5..5f9eca148d 100644 --- a/relay-event-normalization/src/replay.rs +++ b/relay-event-normalization/src/replay.rs @@ -88,11 +88,17 @@ pub fn normalize( client_ip: Option, user_agent: &RawUserAgentInfo<&str>, ) { + println!("40"); normalize_platform(replay); + println!("41"); normalize_ip_address(replay, client_ip); + println!("42"); normalize_user_agent(replay, user_agent); + println!("43"); normalize_type(replay); + println!("44"); normalize_array_fields(replay); + println!("45"); } fn normalize_array_fields(replay: &mut Replay) { @@ -121,6 +127,7 @@ fn normalize_ip_address(replay: &mut Replay, ip_address: Option) { } fn normalize_user_agent(replay: &mut Replay, default_user_agent: &RawUserAgentInfo<&str>) { + println!("50"); let headers = match replay .request .value() @@ -129,16 +136,20 @@ fn normalize_user_agent(replay: &mut Replay, default_user_agent: &RawUserAgentIn Some(headers) => headers, None => return, }; + println!("51"); let user_agent_info = RawUserAgentInfo::from_headers(headers); - + println!("52"); let user_agent_info = if user_agent_info.is_empty() { + println!("53"); default_user_agent } else { + println!("54"); &user_agent_info }; - + println!("55"); let contexts = replay.contexts.get_or_insert_with(Contexts::new); + println!("56"); user_agent::normalize_user_agent_info_generic(contexts, &replay.platform, user_agent_info); } diff --git a/relay-server/src/services/processor.rs b/relay-server/src/services/processor.rs index ccf1e8ae0e..2170dcae20 100644 --- a/relay-server/src/services/processor.rs +++ b/relay-server/src/services/processor.rs @@ -1365,6 +1365,7 @@ impl EnvelopeProcessorService { }) } Err(err) => { + println!("error: {:?}", err); if let Some(outcome) = err.to_outcome() { state.managed_envelope.reject(outcome); } diff --git a/relay-server/src/services/processor/replay.rs b/relay-server/src/services/processor/replay.rs index c93e3f3ef7..56dd14f900 100644 --- a/relay-server/src/services/processor/replay.rs +++ b/relay-server/src/services/processor/replay.rs @@ -131,6 +131,7 @@ pub fn process(state: &mut ProcessEnvelopeState, config: &Config) -> Result<(), _ => ItemAction::Keep, }); + println!("100"); Ok(()) } @@ -141,17 +142,24 @@ fn process_replay_event( client_ip: Option, user_agent: &RawUserAgentInfo<&str>, ) -> Result, ReplayError> { + println!("30"); let mut replay = Annotated::::from_json_bytes(payload).map_err(ReplayError::CouldNotParse)?; if let Some(replay_value) = replay.value_mut() { + println!("31"); replay::validate(replay_value)?; + println!("31.5"); replay::normalize(replay_value, client_ip, user_agent); + println!("31.8"); } else { + println!("32"); return Err(ReplayError::NoContent); } + println!("32.5"); if let Some(ref config) = config.pii_config { + println!("33"); let mut processor = PiiProcessor::new(config.compiled()); processor::process_value(&mut replay, &mut processor, ProcessingState::root()) .map_err(|e| ReplayError::CouldNotScrub(e.to_string()))?; @@ -162,10 +170,11 @@ fn process_replay_event( .pii_config() .map_err(|e| ReplayError::CouldNotScrub(e.to_string()))?; if let Some(config) = pii_config { + println!("34"); let mut processor = PiiProcessor::new(config.compiled()); processor::process_value(&mut replay, &mut processor, ProcessingState::root()) .map_err(|e| ReplayError::CouldNotScrub(e.to_string()))?; } - println!("11"); + println!("40"); Ok(replay) } diff --git a/tests/integration/test_replay_combined_payload.py b/tests/integration/test_replay_combined_payload.py index c26cadb8a9..2d230a5e45 100644 --- a/tests/integration/test_replay_combined_payload.py +++ b/tests/integration/test_replay_combined_payload.py @@ -1,4 +1,6 @@ from sentry_sdk.envelope import Envelope, Item, PayloadRef + +from .test_replay_recordings import recording_payload from .test_replay_events import generate_replay_sdk_event import json @@ -6,43 +8,41 @@ def test_replay_combined_with_processing( mini_sentry, relay_with_processing, replay_recordings_consumer ): - replay_recording_bytes = b"{}\n[]" + project_id = 42 + org_id = 0 + replay_id = "515539018c9b4260a6f999572f1661ee" relay = relay_with_processing() - replay_recordings_consumer = replay_recordings_consumer() - mini_sentry.add_basic_project_config( - 42, - extra={ - "config": { - "features": [ - "organizations:session-replay", - "organizations:session-replay-combined-envelope-items", - ] - } - }, + project_id, extra={"config": {"features": ["organizations:session-replay"]}} ) + replay_recordings_consumer = replay_recordings_consumer() - replay_id = "d2132d31b39445f1938d7e21b6bf0ec4" - - replay_event = generate_replay_sdk_event() - - envelope = Envelope(headers=[["event_id", replay_id]]) - envelope.add_item( - Item(payload=PayloadRef(bytes=replay_recording_bytes), type="replay_recording") + envelope = Envelope( + headers=[ + [ + "event_id", + replay_id, + ], + ["attachment_type", "replay_recording"], + ] ) + payload = recording_payload(b"[]") + envelope.add_item(Item(payload=PayloadRef(bytes=payload), type="replay_recording")) + + replay_event = generate_replay_sdk_event(replay_id=replay_id) envelope.add_item(Item(payload=PayloadRef(json=replay_event), type="replay_event")) - relay.send_envelope(42, envelope) + relay.send_envelope(project_id, envelope) # the not-combined message will be produced first replay_recordings_consumer.get_not_chunked_replay() - combined_replay_message = replay_recordings_consumer.get_not_chunked_replay() + # combined_replay_message = replay_recordings_consumer.get_not_chunked_replay() - assert combined_replay_message["type"] == "replay_recording_not_chunked" - assert combined_replay_message["replay_id"] == "d2132d31b39445f1938d7e21b6bf0ec4" + # assert combined_replay_message["type"] == "replay_recording_not_chunked" + # assert combined_replay_message["replay_id"] == "d2132d31b39445f1938d7e21b6bf0ec4" - assert combined_replay_message["payload"] == replay_recording_bytes + # assert combined_replay_message["payload"] == replay_recording_bytes - replay_event = json.loads(combined_replay_message["replay_event"]) + # replay_event = json.loads(combined_replay_message["replay_event"]) - assert replay_event["replay_id"] == "d2132d31b39445f1938d7e21b6bf0ec4" + # assert replay_event["replay_id"] == "d2132d31b39445f1938d7e21b6bf0ec4" diff --git a/tests/integration/test_replay_events.py b/tests/integration/test_replay_events.py index e4105cf4a8..820061780a 100644 --- a/tests/integration/test_replay_events.py +++ b/tests/integration/test_replay_events.py @@ -2,12 +2,12 @@ import uuid -def generate_replay_sdk_event(): +def generate_replay_sdk_event(replay_id="d2132d31b39445f1938d7e21b6bf0ec4"): return { "type": "replay_event", - "replay_id": "d2132d31b39445f1938d7e21b6bf0ec4", + "replay_id": replay_id, "replay_type": "session", - "event_id": "d2132d31b39445f1938d7e21b6bf0ec4", + "event_id": replay_id, "segment_id": 0, "timestamp": 1597977777.6189718, "replay_start_timestamp": 1597976392.6542819, From 562c5efc7853e68c5618d5777760ceb5a5419b79 Mon Sep 17 00:00:00 2001 From: Joshua Ferge Date: Thu, 8 Feb 2024 15:49:42 -0800 Subject: [PATCH 21/72] tests working --- .../src/normalize/user_agent.rs | 9 -------- relay-event-normalization/src/replay.rs | 13 ----------- relay-server/src/services/processor.rs | 2 -- relay-server/src/services/processor/replay.rs | 22 +------------------ relay-server/src/services/store.rs | 9 ++++---- relay-ua/src/lib.rs | 1 + tests/integration/fixtures/processing.py | 2 +- .../test_replay_combined_payload.py | 22 +++++++++++++------ 8 files changed, 23 insertions(+), 57 deletions(-) diff --git a/relay-event-normalization/src/normalize/user_agent.rs b/relay-event-normalization/src/normalize/user_agent.rs index 43fd5f16f4..d95d77513c 100644 --- a/relay-event-normalization/src/normalize/user_agent.rs +++ b/relay-event-normalization/src/normalize/user_agent.rs @@ -46,21 +46,17 @@ pub fn normalize_user_agent_info_generic( platform: &Annotated, user_agent_info: &RawUserAgentInfo<&str>, ) { - println!("60"); if !contexts.contains::() { if let Some(browser_context) = BrowserContext::from_hints_or_ua(user_agent_info) { - println!("62"); contexts.add(browser_context); } } - println!("61"); if !contexts.contains::() { if let Some(device_context) = DeviceContext::from_hints_or_ua(user_agent_info) { contexts.add(device_context); } } - println!("62"); // avoid conflicts with OS-context sent by a serverside SDK by using `contexts.client_os` // instead of `contexts.os`. This is then preferred by the UI to show alongside device and @@ -72,11 +68,8 @@ pub fn normalize_user_agent_info_generic( Some("javascript") => OsContext::default_key(), _ => "client_os", }; - println!("63"); - if !contexts.contains_key(os_context_key) { if let Some(os_context) = OsContext::from_hints_or_ua(user_agent_info) { - println!("64"); contexts.insert(os_context_key.to_owned(), Context::Os(Box::new(os_context))); } } @@ -337,7 +330,6 @@ impl FromUserAgentInfo for DeviceContext { impl FromUserAgentInfo for BrowserContext { fn parse_client_hints(client_hints: &ClientHints<&str>) -> Option { let (browser, version) = browser_from_client_hints(client_hints.sec_ch_ua?)?; - Some(Self { name: Annotated::new(browser), version: Annotated::new(version), @@ -347,7 +339,6 @@ impl FromUserAgentInfo for BrowserContext { fn parse_user_agent(user_agent: &str) -> Option { let browser = relay_ua::parse_user_agent(user_agent); - if !is_known(&browser.family) { return None; } diff --git a/relay-event-normalization/src/replay.rs b/relay-event-normalization/src/replay.rs index 5f9eca148d..86b40bd81c 100644 --- a/relay-event-normalization/src/replay.rs +++ b/relay-event-normalization/src/replay.rs @@ -88,17 +88,11 @@ pub fn normalize( client_ip: Option, user_agent: &RawUserAgentInfo<&str>, ) { - println!("40"); normalize_platform(replay); - println!("41"); normalize_ip_address(replay, client_ip); - println!("42"); normalize_user_agent(replay, user_agent); - println!("43"); normalize_type(replay); - println!("44"); normalize_array_fields(replay); - println!("45"); } fn normalize_array_fields(replay: &mut Replay) { @@ -127,7 +121,6 @@ fn normalize_ip_address(replay: &mut Replay, ip_address: Option) { } fn normalize_user_agent(replay: &mut Replay, default_user_agent: &RawUserAgentInfo<&str>) { - println!("50"); let headers = match replay .request .value() @@ -136,20 +129,14 @@ fn normalize_user_agent(replay: &mut Replay, default_user_agent: &RawUserAgentIn Some(headers) => headers, None => return, }; - println!("51"); let user_agent_info = RawUserAgentInfo::from_headers(headers); - println!("52"); let user_agent_info = if user_agent_info.is_empty() { - println!("53"); default_user_agent } else { - println!("54"); &user_agent_info }; - println!("55"); let contexts = replay.contexts.get_or_insert_with(Contexts::new); - println!("56"); user_agent::normalize_user_agent_info_generic(contexts, &replay.platform, user_agent_info); } diff --git a/relay-server/src/services/processor.rs b/relay-server/src/services/processor.rs index 2170dcae20..31c813fa39 100644 --- a/relay-server/src/services/processor.rs +++ b/relay-server/src/services/processor.rs @@ -1311,7 +1311,6 @@ impl EnvelopeProcessorService { // This will later be forwarded to upstream. ProcessingGroup::ForwardUnknown => (), } - Ok(()) } @@ -1365,7 +1364,6 @@ impl EnvelopeProcessorService { }) } Err(err) => { - println!("error: {:?}", err); if let Some(outcome) = err.to_outcome() { state.managed_envelope.reject(outcome); } diff --git a/relay-server/src/services/processor/replay.rs b/relay-server/src/services/processor/replay.rs index 56dd14f900..a392d047ff 100644 --- a/relay-server/src/services/processor/replay.rs +++ b/relay-server/src/services/processor/replay.rs @@ -54,23 +54,19 @@ pub fn process(state: &mut ProcessEnvelopeState, config: &Config) -> Result<(), state.managed_envelope.retain_items(|item| match item.ty() { ItemType::ReplayEvent => { if !replays_enabled { - println!("1"); return ItemAction::DropSilently; } if combined_envelope_items { - println!("2"); item.set_replay_combined_payload(true); } match process_replay_event(&item.payload(), project_config, client_addr, user_agent) { Ok(replay) => match replay.to_json() { Ok(json) => { - println!("3"); item.set_payload(ContentType::Json, json); ItemAction::Keep } Err(error) => { - println!("4"); relay_log::error!( error = &error as &dyn Error, "failed to serialize replay" @@ -79,7 +75,6 @@ pub fn process(state: &mut ProcessEnvelopeState, config: &Config) -> Result<(), } }, Err(error) => { - println!("5"); relay_log::warn!(error = &error as &dyn Error, "invalid replay event"); ItemAction::Drop(Outcome::Invalid(match error { ReplayError::NoContent => DiscardReason::InvalidReplayEventNoPayload, @@ -92,18 +87,15 @@ pub fn process(state: &mut ProcessEnvelopeState, config: &Config) -> Result<(), } ItemType::ReplayRecording => { if !replays_enabled { - println!("6"); return ItemAction::DropSilently; } if combined_envelope_items { - println!("7"); - item.set_header("replay_combined_payload", true); + item.set_replay_combined_payload(true); } // XXX: Processing is there just for data scrubbing. Skip the entire expensive // processing step if we do not need to scrub. if !scrubbing_enabled || scrubber.is_empty() { - println!("8"); return ItemAction::Keep; } @@ -117,12 +109,10 @@ pub fn process(state: &mut ProcessEnvelopeState, config: &Config) -> Result<(), match parsed_recording { Ok(recording) => { - println!("9"); item.set_payload(ContentType::OctetStream, recording); ItemAction::Keep } Err(e) => { - println!("10"); relay_log::warn!("replay-recording-event: {e} {event_id:?}"); ItemAction::Drop(Outcome::Invalid(DiscardReason::InvalidReplayRecordingEvent)) } @@ -131,7 +121,6 @@ pub fn process(state: &mut ProcessEnvelopeState, config: &Config) -> Result<(), _ => ItemAction::Keep, }); - println!("100"); Ok(()) } @@ -142,24 +131,17 @@ fn process_replay_event( client_ip: Option, user_agent: &RawUserAgentInfo<&str>, ) -> Result, ReplayError> { - println!("30"); let mut replay = Annotated::::from_json_bytes(payload).map_err(ReplayError::CouldNotParse)?; if let Some(replay_value) = replay.value_mut() { - println!("31"); replay::validate(replay_value)?; - println!("31.5"); replay::normalize(replay_value, client_ip, user_agent); - println!("31.8"); } else { - println!("32"); return Err(ReplayError::NoContent); } - println!("32.5"); if let Some(ref config) = config.pii_config { - println!("33"); let mut processor = PiiProcessor::new(config.compiled()); processor::process_value(&mut replay, &mut processor, ProcessingState::root()) .map_err(|e| ReplayError::CouldNotScrub(e.to_string()))?; @@ -170,11 +152,9 @@ fn process_replay_event( .pii_config() .map_err(|e| ReplayError::CouldNotScrub(e.to_string()))?; if let Some(config) = pii_config { - println!("34"); let mut processor = PiiProcessor::new(config.compiled()); processor::process_value(&mut replay, &mut processor, ProcessingState::root()) .map_err(|e| ReplayError::CouldNotScrub(e.to_string()))?; } - println!("40"); Ok(replay) } diff --git a/relay-server/src/services/store.rs b/relay-server/src/services/store.rs index ac5558cc60..db3a5b6d10 100644 --- a/relay-server/src/services/store.rs +++ b/relay-server/src/services/store.rs @@ -172,8 +172,6 @@ impl StoreService { start_time: Instant, scoping: Scoping, ) -> Result<(), StoreError> { - println!("20"); - let retention = envelope.retention(); let client = envelope.meta().client(); let event_id = envelope.event_id(); @@ -199,7 +197,6 @@ impl StoreService { let mut replay_event = None; let mut replay_recording = None; - println!("HERE??????????"); for item in envelope.items() { match item.ty() { @@ -250,10 +247,11 @@ impl StoreService { item, )?, ItemType::ReplayRecording => { + println!("0"); if item.replay_combined_payload() { + println!("1"); replay_recording = Some(item); } - println!("HERE??????????2"); self.produce_replay_recording(event_id, scoping, item, start_time, retention)?; } @@ -284,6 +282,9 @@ impl StoreService { _ => {} } } + println!("here?"); + println!("event: {:?}", replay_event); + println!("rec: {:?}", replay_recording); if let (Some(replay_event), Some(replay_recording)) = (replay_event, replay_recording) { let combined_replay_kafka_message = Self::extract_combined_replay_kafka_message( diff --git a/relay-ua/src/lib.rs b/relay-ua/src/lib.rs index 38556ae49c..db00d53713 100644 --- a/relay-ua/src/lib.rs +++ b/relay-ua/src/lib.rs @@ -34,6 +34,7 @@ pub fn init_parser() { /// /// Defaults to an empty user agent. pub fn parse_user_agent(user_agent: &str) -> UserAgent { + println!("parse_user_agent {:?}", user_agent); UA_PARSER.parse_user_agent(user_agent) } diff --git a/tests/integration/fixtures/processing.py b/tests/integration/fixtures/processing.py index cc15d6ea00..127c746d1c 100644 --- a/tests/integration/fixtures/processing.py +++ b/tests/integration/fixtures/processing.py @@ -410,7 +410,7 @@ def get_chunked_replay(self): return v def get_not_chunked_replay(self): - message = self.poll() + message = self.poll(timeout=10) assert message is not None assert message.error() is None diff --git a/tests/integration/test_replay_combined_payload.py b/tests/integration/test_replay_combined_payload.py index 2d230a5e45..2b22436376 100644 --- a/tests/integration/test_replay_combined_payload.py +++ b/tests/integration/test_replay_combined_payload.py @@ -13,7 +13,15 @@ def test_replay_combined_with_processing( replay_id = "515539018c9b4260a6f999572f1661ee" relay = relay_with_processing() mini_sentry.add_basic_project_config( - project_id, extra={"config": {"features": ["organizations:session-replay"]}} + project_id, + extra={ + "config": { + "features": [ + "organizations:session-replay", + "organizations:session-replay-combined-envelope-items", + ] + } + }, ) replay_recordings_consumer = replay_recordings_consumer() @@ -36,13 +44,13 @@ def test_replay_combined_with_processing( # the not-combined message will be produced first replay_recordings_consumer.get_not_chunked_replay() - # combined_replay_message = replay_recordings_consumer.get_not_chunked_replay() + combined_replay_message = replay_recordings_consumer.get_not_chunked_replay() - # assert combined_replay_message["type"] == "replay_recording_not_chunked" - # assert combined_replay_message["replay_id"] == "d2132d31b39445f1938d7e21b6bf0ec4" + assert combined_replay_message["type"] == "replay_recording_not_chunked" + assert combined_replay_message["replay_id"] == "d2132d31b39445f1938d7e21b6bf0ec4" - # assert combined_replay_message["payload"] == replay_recording_bytes + assert combined_replay_message["payload"] == replay_recording_bytes - # replay_event = json.loads(combined_replay_message["replay_event"]) + replay_event = json.loads(combined_replay_message["replay_event"]) - # assert replay_event["replay_id"] == "d2132d31b39445f1938d7e21b6bf0ec4" + assert replay_event["replay_id"] == "d2132d31b39445f1938d7e21b6bf0ec4" From 43dd578133d0ac9f713eb93b5f51b1215fefdef4 Mon Sep 17 00:00:00 2001 From: Joshua Ferge Date: Thu, 8 Feb 2024 16:03:54 -0800 Subject: [PATCH 22/72] dont produce two messages --- relay-server/src/services/store.rs | 32 +++++++++---------- tests/integration/fixtures/processing.py | 4 +-- .../test_replay_combined_payload.py | 18 +++++++---- 3 files changed, 29 insertions(+), 25 deletions(-) diff --git a/relay-server/src/services/store.rs b/relay-server/src/services/store.rs index db3a5b6d10..b662219be2 100644 --- a/relay-server/src/services/store.rs +++ b/relay-server/src/services/store.rs @@ -247,25 +247,26 @@ impl StoreService { item, )?, ItemType::ReplayRecording => { - println!("0"); if item.replay_combined_payload() { - println!("1"); replay_recording = Some(item); + } else { + self.produce_replay_recording( + event_id, scoping, item, start_time, retention, + )?; } - - self.produce_replay_recording(event_id, scoping, item, start_time, retention)?; } ItemType::ReplayEvent => { - self.produce_replay_event( - event_id.ok_or(StoreError::NoEventId)?, - scoping.organization_id, - scoping.project_id, - start_time, - retention, - item, - )?; if item.replay_combined_payload() { replay_event = Some(item); + } else { + self.produce_replay_event( + event_id.ok_or(StoreError::NoEventId)?, + scoping.organization_id, + scoping.project_id, + start_time, + retention, + item, + )?; } } ItemType::CheckIn => self.produce_check_in( @@ -282,9 +283,6 @@ impl StoreService { _ => {} } } - println!("here?"); - println!("event: {:?}", replay_event); - println!("rec: {:?}", replay_recording); if let (Some(replay_event), Some(replay_recording)) = (replay_event, replay_recording) { let combined_replay_kafka_message = Self::extract_combined_replay_kafka_message( @@ -444,7 +442,7 @@ impl StoreService { start_time: Instant, retention: u16, ) -> ReplayRecordingNotChunkedKafkaMessage { - return ReplayRecordingNotChunkedKafkaMessage { + ReplayRecordingNotChunkedKafkaMessage { replay_id: event_id, project_id: scoping.project_id, org_id: scoping.organization_id, @@ -453,7 +451,7 @@ impl StoreService { received: UnixTimestamp::from_instant(start_time).as_secs(), payload: replay_recording.payload(), replay_event: Some(replay_event.payload()), - }; + } } fn produce( diff --git a/tests/integration/fixtures/processing.py b/tests/integration/fixtures/processing.py index 127c746d1c..1d62b5dbc9 100644 --- a/tests/integration/fixtures/processing.py +++ b/tests/integration/fixtures/processing.py @@ -409,8 +409,8 @@ def get_chunked_replay(self): assert v["type"] == "replay_recording", v["type"] return v - def get_not_chunked_replay(self): - message = self.poll(timeout=10) + def get_not_chunked_replay(self, timeout=None): + message = self.poll(timeout=timeout) assert message is not None assert message.error() is None diff --git a/tests/integration/test_replay_combined_payload.py b/tests/integration/test_replay_combined_payload.py index 2b22436376..ba6f7391c4 100644 --- a/tests/integration/test_replay_combined_payload.py +++ b/tests/integration/test_replay_combined_payload.py @@ -1,3 +1,4 @@ +import zlib from sentry_sdk.envelope import Envelope, Item, PayloadRef from .test_replay_recordings import recording_payload @@ -42,15 +43,20 @@ def test_replay_combined_with_processing( relay.send_envelope(project_id, envelope) - # the not-combined message will be produced first - replay_recordings_consumer.get_not_chunked_replay() - combined_replay_message = replay_recordings_consumer.get_not_chunked_replay() + combined_replay_message = replay_recordings_consumer.get_not_chunked_replay( + timeout=10 + ) assert combined_replay_message["type"] == "replay_recording_not_chunked" - assert combined_replay_message["replay_id"] == "d2132d31b39445f1938d7e21b6bf0ec4" + assert combined_replay_message["replay_id"] == replay_id - assert combined_replay_message["payload"] == replay_recording_bytes + assert combined_replay_message["payload"] == payload replay_event = json.loads(combined_replay_message["replay_event"]) - assert replay_event["replay_id"] == "d2132d31b39445f1938d7e21b6bf0ec4" + assert replay_event["replay_id"] == replay_id + + +def recording_payload(bits: bytes): + compressed_payload = zlib.compress(bits) + return b'{"segment_id": 0}\n' + compressed_payload From b9f71567e15a6352dadf952b11895da56c543011 Mon Sep 17 00:00:00 2001 From: Joshua Ferge Date: Thu, 8 Feb 2024 16:14:11 -0800 Subject: [PATCH 23/72] cleanup --- relay-dynamic-config/src/feature.rs | 4 ++-- relay-event-normalization/src/normalize/user_agent.rs | 3 +++ relay-event-normalization/src/replay.rs | 2 ++ relay-server/src/envelope.rs | 4 ++-- relay-server/src/services/processor.rs | 1 + relay-server/src/services/processor/replay.rs | 1 + 6 files changed, 11 insertions(+), 4 deletions(-) diff --git a/relay-dynamic-config/src/feature.rs b/relay-dynamic-config/src/feature.rs index edfbf839cf..1d80c42d9b 100644 --- a/relay-dynamic-config/src/feature.rs +++ b/relay-dynamic-config/src/feature.rs @@ -11,8 +11,8 @@ pub enum Feature { /// Enables data scrubbing of replay recording payloads. #[serde(rename = "organizations:session-replay-recording-scrubbing")] SessionReplayRecordingScrubbing, - /// Enables combining session replay envelope item (Replay Recordings and Replay Events). - /// into one item. + /// Enables combining session replay envelope items (Replay Recordings and Replay Events). + /// into one Kafka message. #[serde(rename = "organizations:session-replay-combined-envelope-items")] SessionReplayCombinedEnvelopeItems, /// Enables new User Feedback ingest. diff --git a/relay-event-normalization/src/normalize/user_agent.rs b/relay-event-normalization/src/normalize/user_agent.rs index d95d77513c..2ffc44c210 100644 --- a/relay-event-normalization/src/normalize/user_agent.rs +++ b/relay-event-normalization/src/normalize/user_agent.rs @@ -68,6 +68,7 @@ pub fn normalize_user_agent_info_generic( Some("javascript") => OsContext::default_key(), _ => "client_os", }; + if !contexts.contains_key(os_context_key) { if let Some(os_context) = OsContext::from_hints_or_ua(user_agent_info) { contexts.insert(os_context_key.to_owned(), Context::Os(Box::new(os_context))); @@ -330,6 +331,7 @@ impl FromUserAgentInfo for DeviceContext { impl FromUserAgentInfo for BrowserContext { fn parse_client_hints(client_hints: &ClientHints<&str>) -> Option { let (browser, version) = browser_from_client_hints(client_hints.sec_ch_ua?)?; + Some(Self { name: Annotated::new(browser), version: Annotated::new(version), @@ -339,6 +341,7 @@ impl FromUserAgentInfo for BrowserContext { fn parse_user_agent(user_agent: &str) -> Option { let browser = relay_ua::parse_user_agent(user_agent); + if !is_known(&browser.family) { return None; } diff --git a/relay-event-normalization/src/replay.rs b/relay-event-normalization/src/replay.rs index 86b40bd81c..8d2d294bc5 100644 --- a/relay-event-normalization/src/replay.rs +++ b/relay-event-normalization/src/replay.rs @@ -131,11 +131,13 @@ fn normalize_user_agent(replay: &mut Replay, default_user_agent: &RawUserAgentIn }; let user_agent_info = RawUserAgentInfo::from_headers(headers); + let user_agent_info = if user_agent_info.is_empty() { default_user_agent } else { &user_agent_info }; + let contexts = replay.contexts.get_or_insert_with(Contexts::new); user_agent::normalize_user_agent_info_generic(contexts, &replay.platform, user_agent_info); } diff --git a/relay-server/src/envelope.rs b/relay-server/src/envelope.rs index 9406973e20..92efc1df45 100644 --- a/relay-server/src/envelope.rs +++ b/relay-server/src/envelope.rs @@ -745,12 +745,12 @@ impl Item { self.headers.source_quantities = Some(source_quantities); } - /// Returns the contained source quantities. + /// Returns if the payload's replay items should be combined into one kafka message. pub fn replay_combined_payload(&self) -> bool { self.headers.replay_combined_payload } - /// Sets new source quantities. + /// Sets the replay_combined_payload for this item. pub fn set_replay_combined_payload(&mut self, combined_payload: bool) { self.headers.replay_combined_payload = combined_payload; } diff --git a/relay-server/src/services/processor.rs b/relay-server/src/services/processor.rs index 31c813fa39..ccf1e8ae0e 100644 --- a/relay-server/src/services/processor.rs +++ b/relay-server/src/services/processor.rs @@ -1311,6 +1311,7 @@ impl EnvelopeProcessorService { // This will later be forwarded to upstream. ProcessingGroup::ForwardUnknown => (), } + Ok(()) } diff --git a/relay-server/src/services/processor/replay.rs b/relay-server/src/services/processor/replay.rs index a392d047ff..d61fd53d98 100644 --- a/relay-server/src/services/processor/replay.rs +++ b/relay-server/src/services/processor/replay.rs @@ -156,5 +156,6 @@ fn process_replay_event( processor::process_value(&mut replay, &mut processor, ProcessingState::root()) .map_err(|e| ReplayError::CouldNotScrub(e.to_string()))?; } + Ok(replay) } From c8726f38ed85f043d92730ad09ef63524350f614 Mon Sep 17 00:00:00 2001 From: Joshua Ferge Date: Thu, 8 Feb 2024 16:16:52 -0800 Subject: [PATCH 24/72] remove println --- relay-ua/src/lib.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/relay-ua/src/lib.rs b/relay-ua/src/lib.rs index db00d53713..38556ae49c 100644 --- a/relay-ua/src/lib.rs +++ b/relay-ua/src/lib.rs @@ -34,7 +34,6 @@ pub fn init_parser() { /// /// Defaults to an empty user agent. pub fn parse_user_agent(user_agent: &str) -> UserAgent { - println!("parse_user_agent {:?}", user_agent); UA_PARSER.parse_user_agent(user_agent) } From 27b508d9e06e02e8a8c5c4f98d48c1bda488a88a Mon Sep 17 00:00:00 2001 From: Joshua Ferge Date: Thu, 8 Feb 2024 16:26:46 -0800 Subject: [PATCH 25/72] fix lint --- tests/integration/test_replay_combined_payload.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/tests/integration/test_replay_combined_payload.py b/tests/integration/test_replay_combined_payload.py index ba6f7391c4..50e1ca3bfc 100644 --- a/tests/integration/test_replay_combined_payload.py +++ b/tests/integration/test_replay_combined_payload.py @@ -1,4 +1,3 @@ -import zlib from sentry_sdk.envelope import Envelope, Item, PayloadRef from .test_replay_recordings import recording_payload @@ -10,7 +9,6 @@ def test_replay_combined_with_processing( mini_sentry, relay_with_processing, replay_recordings_consumer ): project_id = 42 - org_id = 0 replay_id = "515539018c9b4260a6f999572f1661ee" relay = relay_with_processing() mini_sentry.add_basic_project_config( @@ -55,8 +53,3 @@ def test_replay_combined_with_processing( replay_event = json.loads(combined_replay_message["replay_event"]) assert replay_event["replay_id"] == replay_id - - -def recording_payload(bits: bytes): - compressed_payload = zlib.compress(bits) - return b'{"segment_id": 0}\n' + compressed_payload From f6256686e532bcd66c3156243ea45aa377f00df9 Mon Sep 17 00:00:00 2001 From: Joshua Ferge Date: Thu, 8 Feb 2024 16:49:37 -0800 Subject: [PATCH 26/72] combined_payload not used in non-processing, allow dead code --- relay-server/src/envelope.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/relay-server/src/envelope.rs b/relay-server/src/envelope.rs index 92efc1df45..01ca7effe1 100644 --- a/relay-server/src/envelope.rs +++ b/relay-server/src/envelope.rs @@ -746,6 +746,7 @@ impl Item { } /// Returns if the payload's replay items should be combined into one kafka message. + #[cfg_attr(not(feature = "processing"), allow(dead_code))] pub fn replay_combined_payload(&self) -> bool { self.headers.replay_combined_payload } From 8d0e335f112d0916fd73e601aca2568ad7c8b96a Mon Sep 17 00:00:00 2001 From: Josh Ferge Date: Fri, 9 Feb 2024 09:09:38 -0800 Subject: [PATCH 27/72] Update relay-server/src/envelope.rs Co-authored-by: Joris Bayer --- relay-server/src/envelope.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/relay-server/src/envelope.rs b/relay-server/src/envelope.rs index 01ca7effe1..24170c6460 100644 --- a/relay-server/src/envelope.rs +++ b/relay-server/src/envelope.rs @@ -489,7 +489,7 @@ pub struct ItemHeaders { #[serde(default, skip)] rate_limited: bool, - /// Indicates that this item should be combined into one payload with othe replay item. + /// Indicates that this item should be combined into one payload with other replay item. /// NOTE: This is internal-only and not exposed into the Envelope. #[serde(default, skip)] replay_combined_payload: bool, From fa4afd2402e11bcc793587835ed5916975e4cf31 Mon Sep 17 00:00:00 2001 From: Joshua Ferge Date: Fri, 9 Feb 2024 09:13:52 -0800 Subject: [PATCH 28/72] simplify processing feature declare on env item flag --- relay-server/src/envelope.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/relay-server/src/envelope.rs b/relay-server/src/envelope.rs index 24170c6460..6c6548fd45 100644 --- a/relay-server/src/envelope.rs +++ b/relay-server/src/envelope.rs @@ -746,7 +746,7 @@ impl Item { } /// Returns if the payload's replay items should be combined into one kafka message. - #[cfg_attr(not(feature = "processing"), allow(dead_code))] + #[cfg(feature = "processing")] pub fn replay_combined_payload(&self) -> bool { self.headers.replay_combined_payload } From 6383eb024f89ce882280a93bbc642aac5c1b8259 Mon Sep 17 00:00:00 2001 From: Joshua Ferge Date: Fri, 9 Feb 2024 11:42:53 -0800 Subject: [PATCH 29/72] move logic into one func --- relay-server/src/services/store.rs | 151 +++++++++++++------- tests/integration/test_replay_recordings.py | 2 +- 2 files changed, 97 insertions(+), 56 deletions(-) diff --git a/relay-server/src/services/store.rs b/relay-server/src/services/store.rs index b662219be2..6c771785fe 100644 --- a/relay-server/src/services/store.rs +++ b/relay-server/src/services/store.rs @@ -197,6 +197,7 @@ impl StoreService { let mut replay_event = None; let mut replay_recording = None; + let mut send_combined_replay_envelope = false; for item in envelope.items() { match item.ty() { @@ -248,26 +249,15 @@ impl StoreService { )?, ItemType::ReplayRecording => { if item.replay_combined_payload() { - replay_recording = Some(item); - } else { - self.produce_replay_recording( - event_id, scoping, item, start_time, retention, - )?; + send_combined_replay_envelope = true } + replay_recording = Some(item); } ItemType::ReplayEvent => { if item.replay_combined_payload() { - replay_event = Some(item); - } else { - self.produce_replay_event( - event_id.ok_or(StoreError::NoEventId)?, - scoping.organization_id, - scoping.project_id, - start_time, - retention, - item, - )?; + send_combined_replay_envelope = true } + replay_event = Some(item); } ItemType::CheckIn => self.produce_check_in( scoping.organization_id, @@ -284,25 +274,35 @@ impl StoreService { } } - if let (Some(replay_event), Some(replay_recording)) = (replay_event, replay_recording) { - let combined_replay_kafka_message = Self::extract_combined_replay_kafka_message( - event_id.ok_or(StoreError::NoEventId)?, - replay_event, - replay_recording, - scoping, - start_time, - retention, - ); - self.produce( - KafkaTopic::ReplayRecordings, - scoping.organization_id, - KafkaMessage::ReplayRecordingNotChunked(combined_replay_kafka_message), - )?; - metric!( - counter(RelayCounters::ProcessingMessageProduced) += 1, - event_type = "replay_recording_combined" - ); - } + self.produce_replay_messages( + replay_event, + replay_recording, + event_id.ok_or(StoreError::NoEventId)?, + scoping, + start_time, + retention, + send_combined_replay_envelope, + )?; + + // if let (Some(replay_event), Some(replay_recording)) = (replay_event, replay_recording) { + // let combined_replay_kafka_message = Self::extract_combined_replay_kafka_message( + // event_id.ok_or(StoreError::NoEventId)?, + // replay_event, + // replay_recording, + // scoping, + // start_time, + // retention, + // ); + // self.produce( + // KafkaTopic::ReplayRecordings, + // scoping.organization_id, + // KafkaMessage::ReplayRecordingNotChunked(combined_replay_kafka_message), + // )?; + // metric!( + // counter(RelayCounters::ProcessingMessageProduced) += 1, + // event_type = "replay_recording_combined" + // ); + // } if event_item.is_none() && attachments.is_empty() { // No event-related content. All done. @@ -434,26 +434,6 @@ impl StoreService { attachment_iterator.chain(event_iterator) } - fn extract_combined_replay_kafka_message( - event_id: EventId, - replay_event: &Item, - replay_recording: &Item, - scoping: Scoping, - start_time: Instant, - retention: u16, - ) -> ReplayRecordingNotChunkedKafkaMessage { - ReplayRecordingNotChunkedKafkaMessage { - replay_id: event_id, - project_id: scoping.project_id, - org_id: scoping.organization_id, - key_id: scoping.key_id, - retention_days: retention, - received: UnixTimestamp::from_instant(start_time).as_secs(), - payload: replay_recording.payload(), - replay_event: Some(replay_event.payload()), - } - } - fn produce( &self, topic: KafkaTopic, @@ -829,6 +809,7 @@ impl StoreService { event_id: Option, scoping: Scoping, item: &Item, + replay_event: Option<&Item>, start_time: Instant, retention: u16, ) -> Result<(), StoreError> { @@ -838,6 +819,11 @@ impl StoreService { // Remaining bytes can be filled by the payload. let max_payload_size = self.config.max_replay_message_size() - max_message_metadata_size; + let mut replay_event_payload = None; + if let Some(replay_event) = replay_event { + replay_event_payload = Some(replay_event.payload()); + } + if item.payload().len() < max_payload_size { let message = KafkaMessage::ReplayRecordingNotChunked(ReplayRecordingNotChunkedKafkaMessage { @@ -848,7 +834,7 @@ impl StoreService { received: UnixTimestamp::from_instant(start_time).as_secs(), retention_days: retention, payload: item.payload(), - replay_event: None, + replay_event: replay_event_payload, }); self.produce( @@ -868,6 +854,61 @@ impl StoreService { Ok(()) } + #[allow(clippy::too_many_arguments)] + fn produce_replay_messages( + &self, + replay_event: Option<&Item>, + replay_recording: Option<&Item>, + replay_id: EventId, + scoping: Scoping, + start_time: Instant, + retention_days: u16, + send_combined_replay_envelope: bool, + ) -> Result<(), StoreError> { + if let Some(replay_event) = replay_event { + self.produce_replay_event( + replay_id, + scoping.organization_id, + scoping.project_id, + start_time, + retention_days, + replay_event, + )?; + + if let Some(replay_recording) = replay_recording { + if send_combined_replay_envelope { + self.produce_replay_recording( + Some(replay_id), + scoping, + replay_recording, + Some(replay_event), + start_time, + retention_days, + )?; + } else { + self.produce_replay_recording( + Some(replay_id), + scoping, + replay_recording, + None, + start_time, + retention_days, + )?; + } + } + } else if let Some(replay_recording) = replay_recording { + self.produce_replay_recording( + Some(replay_id), + scoping, + replay_recording, + None, + start_time, + retention_days, + )?; + } + Ok(()) + } + fn produce_check_in( &self, organization_id: u64, diff --git a/tests/integration/test_replay_recordings.py b/tests/integration/test_replay_recordings.py index 86b12fe207..a4ac8877c5 100644 --- a/tests/integration/test_replay_recordings.py +++ b/tests/integration/test_replay_recordings.py @@ -125,7 +125,7 @@ def test_nonchunked_replay_recordings_processing( relay.send_envelope(project_id, envelope) # Get the non-chunked replay-recording message from the kafka queue. - replay_recording = replay_recordings_consumer.get_not_chunked_replay() + replay_recording = replay_recordings_consumer.get_not_chunked_replay(timeout=10) assert replay_recording["replay_id"] == replay_id assert replay_recording["project_id"] == project_id assert replay_recording["key_id"] == 123 From 9224202c0cf2aa80b182e397eb0f43a04d0c68e9 Mon Sep 17 00:00:00 2001 From: Joshua Ferge Date: Fri, 9 Feb 2024 11:50:55 -0800 Subject: [PATCH 30/72] clean refactor --- relay-server/src/services/store.rs | 42 +++++++++--------------------- 1 file changed, 12 insertions(+), 30 deletions(-) diff --git a/relay-server/src/services/store.rs b/relay-server/src/services/store.rs index 6c771785fe..9e4b313c53 100644 --- a/relay-server/src/services/store.rs +++ b/relay-server/src/services/store.rs @@ -284,26 +284,6 @@ impl StoreService { send_combined_replay_envelope, )?; - // if let (Some(replay_event), Some(replay_recording)) = (replay_event, replay_recording) { - // let combined_replay_kafka_message = Self::extract_combined_replay_kafka_message( - // event_id.ok_or(StoreError::NoEventId)?, - // replay_event, - // replay_recording, - // scoping, - // start_time, - // retention, - // ); - // self.produce( - // KafkaTopic::ReplayRecordings, - // scoping.organization_id, - // KafkaMessage::ReplayRecordingNotChunked(combined_replay_kafka_message), - // )?; - // metric!( - // counter(RelayCounters::ProcessingMessageProduced) += 1, - // event_type = "replay_recording_combined" - // ); - // } - if event_item.is_none() && attachments.is_empty() { // No event-related content. All done. return Ok(()); @@ -866,6 +846,7 @@ impl StoreService { send_combined_replay_envelope: bool, ) -> Result<(), StoreError> { if let Some(replay_event) = replay_event { + // always produce replay event self.produce_replay_event( replay_id, scoping.organization_id, @@ -876,27 +857,28 @@ impl StoreService { )?; if let Some(replay_recording) = replay_recording { + /* + produce replay recording with replay event if combined flag is set + otherwise produce replay recording without replay event + */ if send_combined_replay_envelope { self.produce_replay_recording( Some(replay_id), scoping, replay_recording, - Some(replay_event), - start_time, - retention_days, - )?; - } else { - self.produce_replay_recording( - Some(replay_id), - scoping, - replay_recording, - None, + if send_combined_replay_envelope { + Some(replay_event) + } else { + None + }, start_time, retention_days, )?; } } } else if let Some(replay_recording) = replay_recording { + // this block in theory should never happen, as SDK always sends replay_event and recording together, + // but just in case, if we only receive a recording without an event, we'll still produce it. self.produce_replay_recording( Some(replay_id), scoping, From f56ab35963eb9ed086451f6738fe72463a46eccb Mon Sep 17 00:00:00 2001 From: Joshua Ferge Date: Fri, 9 Feb 2024 11:59:56 -0800 Subject: [PATCH 31/72] add one final test / fix impl --- relay-server/src/services/store.rs | 26 ++++----- .../test_replay_combined_payload.py | 58 ++++++++++++++++++- 2 files changed, 69 insertions(+), 15 deletions(-) diff --git a/relay-server/src/services/store.rs b/relay-server/src/services/store.rs index 9e4b313c53..01612c0108 100644 --- a/relay-server/src/services/store.rs +++ b/relay-server/src/services/store.rs @@ -861,20 +861,18 @@ impl StoreService { produce replay recording with replay event if combined flag is set otherwise produce replay recording without replay event */ - if send_combined_replay_envelope { - self.produce_replay_recording( - Some(replay_id), - scoping, - replay_recording, - if send_combined_replay_envelope { - Some(replay_event) - } else { - None - }, - start_time, - retention_days, - )?; - } + self.produce_replay_recording( + Some(replay_id), + scoping, + replay_recording, + if send_combined_replay_envelope { + Some(replay_event) + } else { + None + }, + start_time, + retention_days, + )?; } } else if let Some(replay_recording) = replay_recording { // this block in theory should never happen, as SDK always sends replay_event and recording together, diff --git a/tests/integration/test_replay_combined_payload.py b/tests/integration/test_replay_combined_payload.py index 50e1ca3bfc..ad3dd8b4a7 100644 --- a/tests/integration/test_replay_combined_payload.py +++ b/tests/integration/test_replay_combined_payload.py @@ -6,7 +6,10 @@ def test_replay_combined_with_processing( - mini_sentry, relay_with_processing, replay_recordings_consumer + mini_sentry, + relay_with_processing, + replay_recordings_consumer, + replay_events_consumer, ): project_id = 42 replay_id = "515539018c9b4260a6f999572f1661ee" @@ -23,6 +26,7 @@ def test_replay_combined_with_processing( }, ) replay_recordings_consumer = replay_recordings_consumer() + replay_events_consumer = replay_events_consumer(timeout=10) envelope = Envelope( headers=[ @@ -53,3 +57,55 @@ def test_replay_combined_with_processing( replay_event = json.loads(combined_replay_message["replay_event"]) assert replay_event["replay_id"] == replay_id + + replay_event, replay_event_message = replay_events_consumer.get_replay_event() + assert replay_event["type"] == "replay_event" + assert replay_event["replay_id"] == replay_id + assert replay_event_message["retention_days"] == 90 + + +def test_replay_combined_with_processing_no_flag_set( + mini_sentry, relay_with_processing, replay_recordings_consumer +): + project_id = 42 + replay_id = "515539018c9b4260a6f999572f1661ee" + relay = relay_with_processing() + mini_sentry.add_basic_project_config( + project_id, + extra={ + "config": { + "features": [ + "organizations:session-replay", + ] + } + }, + ) + replay_recordings_consumer = replay_recordings_consumer() + + envelope = Envelope( + headers=[ + [ + "event_id", + replay_id, + ], + ["attachment_type", "replay_recording"], + ] + ) + payload = recording_payload(b"[]") + envelope.add_item(Item(payload=PayloadRef(bytes=payload), type="replay_recording")) + + replay_event = generate_replay_sdk_event(replay_id=replay_id) + envelope.add_item(Item(payload=PayloadRef(json=replay_event), type="replay_event")) + + relay.send_envelope(project_id, envelope) + + replay_recording_message = replay_recordings_consumer.get_not_chunked_replay( + timeout=10 + ) + + assert replay_recording_message["type"] == "replay_recording_not_chunked" + assert replay_recording_message["replay_id"] == replay_id + + assert replay_recording_message["payload"] == payload + + assert replay_recording_message["replay_event"] is None From 3b52610d661c1fd5aaf099188f6b602ea420d3cf Mon Sep 17 00:00:00 2001 From: Joshua Ferge Date: Mon, 12 Feb 2024 19:39:43 -0800 Subject: [PATCH 32/72] small refactor to produce function --- relay-server/src/services/store.rs | 34 +++++++++--------------------- 1 file changed, 10 insertions(+), 24 deletions(-) diff --git a/relay-server/src/services/store.rs b/relay-server/src/services/store.rs index 01612c0108..ada3b99f1b 100644 --- a/relay-server/src/services/store.rs +++ b/relay-server/src/services/store.rs @@ -846,7 +846,6 @@ impl StoreService { send_combined_replay_envelope: bool, ) -> Result<(), StoreError> { if let Some(replay_event) = replay_event { - // always produce replay event self.produce_replay_event( replay_id, scoping.organization_id, @@ -855,40 +854,27 @@ impl StoreService { retention_days, replay_event, )?; + } + + if let Some(replay_recording) = replay_recording { + let combined_replay_event = if send_combined_replay_envelope && replay_event.is_some() { + replay_event + } else { + None + }; - if let Some(replay_recording) = replay_recording { - /* - produce replay recording with replay event if combined flag is set - otherwise produce replay recording without replay event - */ - self.produce_replay_recording( - Some(replay_id), - scoping, - replay_recording, - if send_combined_replay_envelope { - Some(replay_event) - } else { - None - }, - start_time, - retention_days, - )?; - } - } else if let Some(replay_recording) = replay_recording { - // this block in theory should never happen, as SDK always sends replay_event and recording together, - // but just in case, if we only receive a recording without an event, we'll still produce it. self.produce_replay_recording( Some(replay_id), scoping, replay_recording, - None, + combined_replay_event, start_time, retention_days, )?; } + Ok(()) } - fn produce_check_in( &self, organization_id: u64, From 3a71696b498052e6d04dea7dd2eda8c1c172180c Mon Sep 17 00:00:00 2001 From: Colton Allen Date: Tue, 13 Feb 2024 09:48:42 -0600 Subject: [PATCH 33/72] Add ReplayVideo envelope item type --- relay-server/src/envelope.rs | 26 +++++++++++++++++++- relay-server/src/services/processor/event.rs | 1 + relay-server/src/utils/rate_limits.rs | 1 + relay-server/src/utils/sizes.rs | 1 + 4 files changed, 28 insertions(+), 1 deletion(-) diff --git a/relay-server/src/envelope.rs b/relay-server/src/envelope.rs index 93f1badcdb..b84b102014 100644 --- a/relay-server/src/envelope.rs +++ b/relay-server/src/envelope.rs @@ -113,6 +113,8 @@ pub enum ItemType { ReplayEvent, /// Replay Recording data. ReplayRecording, + /// Replay Video data. + ReplayVideo, /// Monitor check-in encoded as JSON. CheckIn, /// A standalone span. @@ -167,6 +169,7 @@ impl ItemType { Self::Profile => "profile", Self::ReplayEvent => "replay_event", Self::ReplayRecording => "replay_recording", + Self::ReplayVideo => "replay_video", Self::CheckIn => "check_in", Self::Span => "span", Self::OtelSpan => "otel_span", @@ -213,6 +216,7 @@ impl std::str::FromStr for ItemType { "profile" => Self::Profile, "replay_event" => Self::ReplayEvent, "replay_recording" => Self::ReplayRecording, + "replay_video" => Self::ReplayVideo, "check_in" => Self::CheckIn, "span" => Self::Span, "otel_span" => Self::OtelSpan, @@ -652,7 +656,9 @@ impl Item { } else { DataCategory::Profile }), - ItemType::ReplayEvent | ItemType::ReplayRecording => Some(DataCategory::Replay), + ItemType::ReplayEvent | ItemType::ReplayRecording | ItemType::ReplayVideo => { + Some(DataCategory::Replay) + } ItemType::ClientReport => None, ItemType::CheckIn => Some(DataCategory::Monitor), ItemType::Span | ItemType::OtelSpan => Some(if indexed { @@ -864,6 +870,7 @@ impl Item { | ItemType::ClientReport | ItemType::ReplayEvent | ItemType::ReplayRecording + | ItemType::ReplayVideo | ItemType::Profile | ItemType::CheckIn | ItemType::Span @@ -898,6 +905,7 @@ impl Item { ItemType::MetricMeta => false, ItemType::ClientReport => false, ItemType::ReplayRecording => false, + ItemType::ReplayVideo => false, ItemType::Profile => true, ItemType::CheckIn => false, ItemType::Span => false, @@ -1733,6 +1741,22 @@ mod tests { assert_eq!(items[0].ty(), &ItemType::ReplayRecording); } + #[test] + fn test_deserialize_envelope_replay_video() { + let bytes = Bytes::from( + "\ + {\"event_id\":\"9ec79c33ec9942ab8353589fcb2e04dc\",\"dsn\":\"https://e12d836b15bb49d7bbf99e64295d995b:@sentry.io/42\"}\n\ + {\"type\":\"replay_video\"}\n\ + helloworld\n\ + ", + ); + + let envelope = Envelope::parse_bytes(bytes).unwrap(); + assert_eq!(envelope.len(), 1); + let items: Vec<_> = envelope.items().collect(); + assert_eq!(items[0].ty(), &ItemType::ReplayVideo); + } + #[test] fn test_deserialize_envelope_view_hierarchy() { let bytes = Bytes::from( diff --git a/relay-server/src/services/processor/event.rs b/relay-server/src/services/processor/event.rs index 2dba233c4d..e5621358fb 100644 --- a/relay-server/src/services/processor/event.rs +++ b/relay-server/src/services/processor/event.rs @@ -458,6 +458,7 @@ fn is_duplicate(item: &Item, processing_enabled: bool) -> bool { ItemType::Profile => false, ItemType::ReplayEvent => false, ItemType::ReplayRecording => false, + ItemType::ReplayVideo => false, ItemType::CheckIn => false, ItemType::Span => false, ItemType::OtelSpan => false, diff --git a/relay-server/src/utils/rate_limits.rs b/relay-server/src/utils/rate_limits.rs index 3e8f7b3fd6..f6f6d8e59c 100644 --- a/relay-server/src/utils/rate_limits.rs +++ b/relay-server/src/utils/rate_limits.rs @@ -109,6 +109,7 @@ fn infer_event_category(item: &Item) -> Option { ItemType::Profile => None, ItemType::ReplayEvent => None, ItemType::ReplayRecording => None, + ItemType::ReplayVideo => None, ItemType::ClientReport => None, ItemType::CheckIn => None, ItemType::Span => None, diff --git a/relay-server/src/utils/sizes.rs b/relay-server/src/utils/sizes.rs index cf02d1c584..369c058b2b 100644 --- a/relay-server/src/utils/sizes.rs +++ b/relay-server/src/utils/sizes.rs @@ -48,6 +48,7 @@ pub fn check_envelope_size_limits(config: &Config, envelope: &Envelope) -> Resul config.max_attachment_size() } ItemType::ReplayRecording => config.max_replay_compressed_size(), + ItemType::ReplayVideo => config.max_replay_compressed_size(), ItemType::Session | ItemType::Sessions => { session_count += 1; NO_LIMIT From bdb0345c9720ae1a4a3bf70d59df7cf74aba3d36 Mon Sep 17 00:00:00 2001 From: Colton Allen Date: Tue, 13 Feb 2024 15:14:23 -0600 Subject: [PATCH 34/72] Add video envelope item validator --- Cargo.lock | 1 + relay-replays/Cargo.toml | 1 + relay-replays/src/lib.rs | 1 + relay-replays/src/video.rs | 68 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 71 insertions(+) create mode 100644 relay-replays/src/video.rs diff --git a/Cargo.lock b/Cargo.lock index ac2e0840a3..5a507863a0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4028,6 +4028,7 @@ name = "relay-replays" version = "24.1.2" dependencies = [ "assert-json-diff", + "bytes", "criterion", "flate2", "insta", diff --git a/relay-replays/Cargo.toml b/relay-replays/Cargo.toml index 17edeb7a78..9f44e62162 100644 --- a/relay-replays/Cargo.toml +++ b/relay-replays/Cargo.toml @@ -10,6 +10,7 @@ license-file = "../LICENSE.md" publish = false [dependencies] +bytes = { version = "1.4.0" } flate2 = "1.0.19" once_cell = { workspace = true } relay-common = { path = "../relay-common" } diff --git a/relay-replays/src/lib.rs b/relay-replays/src/lib.rs index b9c718385e..c7f3555081 100644 --- a/relay-replays/src/lib.rs +++ b/relay-replays/src/lib.rs @@ -15,3 +15,4 @@ pub mod recording; mod transform; +pub mod video; diff --git a/relay-replays/src/video.rs b/relay-replays/src/video.rs new file mode 100644 index 0000000000..d2136ab35a --- /dev/null +++ b/relay-replays/src/video.rs @@ -0,0 +1,68 @@ +use bytes::Bytes; +use serde::Deserialize; +use serde_json; +use std::fmt; + +pub fn validate_replay_video(payload: &Bytes) -> Result<(), VideoError> { + // Validate data was provided. + if payload.is_empty() { + return Err(VideoError::Message("no video message found")); + } + + // Validate we were able to find leading headers. + let mut split = payload.splitn(2, |b| b == &b'\n'); + let header = split + .next() + .ok_or(VideoError::Message("no video headers found"))?; + + // Validate the body contains data. + match split.next() { + Some(b"") | None => return Err(VideoError::Message("no video payload found")), + _ => {} + }; + + // Validate the headers are in the appropriate format. + serde_json::from_slice::(header)?; + + Ok(()) +} + +#[allow(dead_code)] +#[derive(Debug, Deserialize)] +struct VideoHeaders { + segment_id: u16, +} + +#[derive(Debug)] +pub enum VideoError { + /// An error parsing the JSON payload. + Parse(serde_json::Error), + /// Validation of the payload failed. + /// + /// The body is empty, is missing the headers, or the body. + Message(&'static str), +} + +impl fmt::Display for VideoError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + VideoError::Parse(serde_error) => write!(f, "{serde_error}"), + VideoError::Message(message) => write!(f, "{message}"), + } + } +} + +impl std::error::Error for VideoError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + match self { + VideoError::Parse(e) => Some(e), + VideoError::Message(_) => None, + } + } +} + +impl From for VideoError { + fn from(err: serde_json::Error) -> Self { + VideoError::Parse(err) + } +} From e7bc952568721c10fd413edb404504eb72e5c4de Mon Sep 17 00:00:00 2001 From: Colton Allen Date: Tue, 13 Feb 2024 15:15:37 -0600 Subject: [PATCH 35/72] Add ReplayVideo item type processor --- relay-server/src/services/processor/replay.rs | 136 ++++++++++-------- 1 file changed, 79 insertions(+), 57 deletions(-) diff --git a/relay-server/src/services/processor/replay.rs b/relay-server/src/services/processor/replay.rs index 5816751c3d..0b96d08814 100644 --- a/relay-server/src/services/processor/replay.rs +++ b/relay-server/src/services/processor/replay.rs @@ -13,6 +13,7 @@ use relay_event_schema::protocol::Replay; use relay_pii::PiiProcessor; use relay_protocol::Annotated; use relay_replays::recording::RecordingScrubber; +use relay_replays::video::validate_replay_video; use relay_statsd::metric; use crate::envelope::{ContentType, ItemType}; @@ -54,76 +55,97 @@ pub fn process( let combined_envelope_items = project_state.has_feature(Feature::SessionReplayCombinedEnvelopeItems); - state.managed_envelope.retain_items(|item| match item.ty() { - ItemType::ReplayEvent => { - if !replays_enabled { - return ItemAction::DropSilently; - } - if combined_envelope_items { - item.set_replay_combined_payload(true); - } + // If any envelope item is dropped the whole request should be abandoned. + let mut item_dropped = false; - match process_replay_event(&item.payload(), project_config, client_addr, user_agent) { - Ok(replay) => match replay.to_json() { - Ok(json) => { - item.set_payload(ContentType::Json, json); - ItemAction::Keep - } + state.managed_envelope.retain_items(|item| { + if !replays_enabled || item_dropped { + return ItemAction::DropSilently; + } + + match item.ty() { + ItemType::ReplayEvent => { + if combined_envelope_items { + item.set_replay_combined_payload(true); + } + + match process_replay_event(&item.payload(), project_config, client_addr, user_agent) + { + Ok(replay) => match replay.to_json() { + Ok(json) => { + item.set_payload(ContentType::Json, json); + ItemAction::Keep + } + Err(error) => { + relay_log::error!( + error = &error as &dyn Error, + "failed to serialize replay" + ); + ItemAction::Keep + } + }, Err(error) => { - relay_log::error!( - error = &error as &dyn Error, - "failed to serialize replay" - ); - ItemAction::Keep + item_dropped = true; + relay_log::warn!(error = &error as &dyn Error, "invalid replay event"); + ItemAction::Drop(Outcome::Invalid(match error { + ReplayError::NoContent => DiscardReason::InvalidReplayEventNoPayload, + ReplayError::CouldNotScrub(_) => DiscardReason::InvalidReplayEventPii, + ReplayError::CouldNotParse(_) => DiscardReason::InvalidReplayEvent, + ReplayError::InvalidPayload(_) => DiscardReason::InvalidReplayEvent, + })) } - }, - Err(error) => { - relay_log::warn!(error = &error as &dyn Error, "invalid replay event"); - ItemAction::Drop(Outcome::Invalid(match error { - ReplayError::NoContent => DiscardReason::InvalidReplayEventNoPayload, - ReplayError::CouldNotScrub(_) => DiscardReason::InvalidReplayEventPii, - ReplayError::CouldNotParse(_) => DiscardReason::InvalidReplayEvent, - ReplayError::InvalidPayload(_) => DiscardReason::InvalidReplayEvent, - })) } } - } - ItemType::ReplayRecording => { - if !replays_enabled { - return ItemAction::DropSilently; - } - if combined_envelope_items { - item.set_replay_combined_payload(true); - } + ItemType::ReplayRecording => { + if combined_envelope_items { + item.set_replay_combined_payload(true); + } - // XXX: Processing is there just for data scrubbing. Skip the entire expensive - // processing step if we do not need to scrub. - if !scrubbing_enabled || scrubber.is_empty() { - return ItemAction::Keep; - } + // XXX: Processing is there just for data scrubbing. Skip the entire expensive + // processing step if we do not need to scrub. + if !scrubbing_enabled || scrubber.is_empty() { + return ItemAction::Keep; + } - // Limit expansion of recordings to the max replay size. The payload is - // decompressed temporarily and then immediately re-compressed. However, to - // limit memory pressure, we use the replay limit as a good overall limit for - // allocations. - let parsed_recording = metric!(timer(RelayTimers::ReplayRecordingProcessing), { - scrubber.process_recording(&item.payload()) - }); - - match parsed_recording { - Ok(recording) => { - item.set_payload(ContentType::OctetStream, recording); - ItemAction::Keep + // Limit expansion of recordings to the max replay size. The payload is + // decompressed temporarily and then immediately re-compressed. However, to + // limit memory pressure, we use the replay limit as a good overall limit for + // allocations. + let parsed_recording = metric!(timer(RelayTimers::ReplayRecordingProcessing), { + scrubber.process_recording(&item.payload()) + }); + + match parsed_recording { + Ok(recording) => { + item.set_payload(ContentType::OctetStream, recording); + ItemAction::Keep + } + Err(e) => { + item_dropped = true; + relay_log::warn!("replay-recording-event: {e} {event_id:?}"); + ItemAction::Drop(Outcome::Invalid( + DiscardReason::InvalidReplayRecordingEvent, + )) + } } + } + ItemType::ReplayVideo => match validate_replay_video(&item.payload()) { + Ok(()) => ItemAction::Keep, Err(e) => { - relay_log::warn!("replay-recording-event: {e} {event_id:?}"); - ItemAction::Drop(Outcome::Invalid(DiscardReason::InvalidReplayRecordingEvent)) + item_dropped = true; + relay_log::warn!("could not parse video headers: {e} {event_id:?}"); + ItemAction::Drop(Outcome::Invalid(DiscardReason::InvalidReplayVideoEvent)) } - } + }, + _ => ItemAction::Keep, } - _ => ItemAction::Keep, }); + // If an envelope-item was dropped return an error result to drop the entire envelope. + if item_dropped { + return Err(ProcessingError::PartiallyDroppedReplayEnvelope); + } + Ok(()) } From 0331a1fb43e180d69796c852a2d5f7f25dc5bdca Mon Sep 17 00:00:00 2001 From: Colton Allen Date: Tue, 13 Feb 2024 15:16:54 -0600 Subject: [PATCH 36/72] Send optional replay_video field in Kafka payload --- relay-server/src/services/store.rs | 77 +++++++++++++++++------------- 1 file changed, 45 insertions(+), 32 deletions(-) diff --git a/relay-server/src/services/store.rs b/relay-server/src/services/store.rs index ada3b99f1b..614b22952c 100644 --- a/relay-server/src/services/store.rs +++ b/relay-server/src/services/store.rs @@ -197,6 +197,7 @@ impl StoreService { let mut replay_event = None; let mut replay_recording = None; + let mut replay_video = None; let mut send_combined_replay_envelope = false; for item in envelope.items() { @@ -247,6 +248,7 @@ impl StoreService { start_time, item, )?, + ItemType::ReplayVideo => replay_video = Some(item), ItemType::ReplayRecording => { if item.replay_combined_payload() { send_combined_replay_envelope = true @@ -277,6 +279,7 @@ impl StoreService { self.produce_replay_messages( replay_event, replay_recording, + replay_video, event_id.ok_or(StoreError::NoEventId)?, scoping, start_time, @@ -784,52 +787,58 @@ impl StoreService { Ok(()) } + #[allow(clippy::too_many_arguments)] fn produce_replay_recording( &self, event_id: Option, scoping: Scoping, item: &Item, replay_event: Option<&Item>, + replay_video: Option<&Item>, start_time: Instant, retention: u16, ) -> Result<(), StoreError> { - // 2000 bytes are reserved for the message metadata. - let max_message_metadata_size = 2000; + // Map the event and video items to their byte messages. + let replay_event_payload = replay_event.map(|rv| rv.payload()); + let replay_video_payload = replay_video.map(|rv| rv.payload()); // Remaining bytes can be filled by the payload. - let max_payload_size = self.config.max_replay_message_size() - max_message_metadata_size; - - let mut replay_event_payload = None; - if let Some(replay_event) = replay_event { - replay_event_payload = Some(replay_event.payload()); + let mut max_payload_size = self.config.max_replay_message_size(); + max_payload_size -= replay_event_payload.as_ref().map_or(0, |b| b.len()); + max_payload_size -= replay_video_payload.as_ref().map_or(0, |b| b.len()); + max_payload_size -= 2000; // Reserve 2KB for the message metadata. + + // If the recording payload can not fit in to the message do not produce and quit early. + // + // TODO: Should we emit an outcome here? + if item.payload().len() >= max_payload_size { + relay_log::warn!("replay_recording over maximum size."); + return Ok(()); } - if item.payload().len() < max_payload_size { - let message = - KafkaMessage::ReplayRecordingNotChunked(ReplayRecordingNotChunkedKafkaMessage { - replay_id: event_id.ok_or(StoreError::NoEventId)?, - project_id: scoping.project_id, - key_id: scoping.key_id, - org_id: scoping.organization_id, - received: UnixTimestamp::from_instant(start_time).as_secs(), - retention_days: retention, - payload: item.payload(), - replay_event: replay_event_payload, - }); + let message = + KafkaMessage::ReplayRecordingNotChunked(ReplayRecordingNotChunkedKafkaMessage { + replay_id: event_id.ok_or(StoreError::NoEventId)?, + project_id: scoping.project_id, + key_id: scoping.key_id, + org_id: scoping.organization_id, + received: UnixTimestamp::from_instant(start_time).as_secs(), + retention_days: retention, + payload: item.payload(), + replay_event: replay_event_payload, + replay_video: replay_video_payload, + }); - self.produce( - KafkaTopic::ReplayRecordings, - scoping.organization_id, - message, - )?; + self.produce( + KafkaTopic::ReplayRecordings, + scoping.organization_id, + message, + )?; - metric!( - counter(RelayCounters::ProcessingMessageProduced) += 1, - event_type = "replay_recording_not_chunked" - ); - } else { - relay_log::warn!("replay_recording over maximum size."); - }; + metric!( + counter(RelayCounters::ProcessingMessageProduced) += 1, + event_type = "replay_recording_not_chunked" + ); Ok(()) } @@ -839,6 +848,7 @@ impl StoreService { &self, replay_event: Option<&Item>, replay_recording: Option<&Item>, + replay_video: Option<&Item>, replay_id: EventId, scoping: Scoping, start_time: Instant, @@ -857,7 +867,8 @@ impl StoreService { } if let Some(replay_recording) = replay_recording { - let combined_replay_event = if send_combined_replay_envelope && replay_event.is_some() { + // We only combine if the feature-flag was enabled. + let combined_replay_event = if send_combined_replay_envelope { replay_event } else { None @@ -868,6 +879,7 @@ impl StoreService { scoping, replay_recording, combined_replay_event, + replay_video, start_time, retention_days, )?; @@ -1177,6 +1189,7 @@ struct ReplayRecordingNotChunkedKafkaMessage { retention_days: u16, payload: Bytes, replay_event: Option, + replay_video: Option, } /// User report for an event wrapped up in a message ready for consumption in Kafka. From 8208ea4794884ac7a3e72c1b479eded47da48dbc Mon Sep 17 00:00:00 2001 From: Colton Allen Date: Tue, 13 Feb 2024 15:17:21 -0600 Subject: [PATCH 37/72] Register outcome, rate-limit, and processing error --- relay-server/src/services/outcome.rs | 2 ++ relay-server/src/services/processor.rs | 8 ++++++++ relay-server/src/utils/rate_limits.rs | 1 + 3 files changed, 11 insertions(+) diff --git a/relay-server/src/services/outcome.rs b/relay-server/src/services/outcome.rs index ab63931da2..0e23660cc8 100644 --- a/relay-server/src/services/outcome.rs +++ b/relay-server/src/services/outcome.rs @@ -366,6 +366,7 @@ pub enum DiscardReason { InvalidReplayEventNoPayload, InvalidReplayEventPii, InvalidReplayRecordingEvent, + InvalidReplayVideoEvent, /// (Relay) Profiling related discard reasons Profiling(&'static str), @@ -413,6 +414,7 @@ impl DiscardReason { DiscardReason::InvalidReplayEventNoPayload => "invalid_replay_no_payload", DiscardReason::InvalidReplayEventPii => "invalid_replay_pii_scrubber_failed", DiscardReason::InvalidReplayRecordingEvent => "invalid_replay_recording", + DiscardReason::InvalidReplayVideoEvent => "invalid_replay_video", DiscardReason::Profiling(reason) => reason, DiscardReason::InvalidSpan => "invalid_span", } diff --git a/relay-server/src/services/processor.rs b/relay-server/src/services/processor.rs index 626ebd457f..015094a805 100644 --- a/relay-server/src/services/processor.rs +++ b/relay-server/src/services/processor.rs @@ -345,6 +345,11 @@ pub enum ProcessingError { #[error("invalid pii config")] PiiConfigError(PiiConfigError), + + // A replay is made up of a maximum of three envelope items. If one of the items + // is dropped then the rest should be dropped with it. + #[error("one or more parts of the replay were dropped")] + PartiallyDroppedReplayEnvelope, } impl ProcessingError { @@ -385,6 +390,9 @@ impl ProcessingError { // These outcomes are emitted at the source. Self::MissingProjectId => None, Self::EventFiltered(_) => None, + + // Replay + Self::PartiallyDroppedReplayEnvelope => None, } } diff --git a/relay-server/src/utils/rate_limits.rs b/relay-server/src/utils/rate_limits.rs index f6f6d8e59c..c74314b1d3 100644 --- a/relay-server/src/utils/rate_limits.rs +++ b/relay-server/src/utils/rate_limits.rs @@ -210,6 +210,7 @@ impl EnvelopeSummary { ItemType::Profile => &mut self.profile_quantity, ItemType::ReplayEvent => &mut self.replay_quantity, ItemType::ReplayRecording => &mut self.replay_quantity, + ItemType::ReplayVideo => &mut self.replay_quantity, ItemType::CheckIn => &mut self.checkin_quantity, _ => return, }; From 67c06059035f42409afe97ca97f51e02560bffec Mon Sep 17 00:00:00 2001 From: Colton Allen Date: Tue, 13 Feb 2024 15:42:10 -0600 Subject: [PATCH 38/72] Compute payload through addition --- relay-server/src/services/store.rs | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/relay-server/src/services/store.rs b/relay-server/src/services/store.rs index 614b22952c..5c18c885d5 100644 --- a/relay-server/src/services/store.rs +++ b/relay-server/src/services/store.rs @@ -798,21 +798,32 @@ impl StoreService { start_time: Instant, retention: u16, ) -> Result<(), StoreError> { - // Map the event and video items to their byte messages. + // Map the event and video items to their byte payload values. let replay_event_payload = replay_event.map(|rv| rv.payload()); let replay_video_payload = replay_video.map(|rv| rv.payload()); - // Remaining bytes can be filled by the payload. - let mut max_payload_size = self.config.max_replay_message_size(); - max_payload_size -= replay_event_payload.as_ref().map_or(0, |b| b.len()); - max_payload_size -= replay_video_payload.as_ref().map_or(0, |b| b.len()); - max_payload_size -= 2000; // Reserve 2KB for the message metadata. + // Maximum number of bytes accepted by the consumer. + let max_payload_size = self.config.max_replay_message_size(); + + // Size of the consumer message. We can be reasonably sure this won't overflow because + // of the request size validation provided by Nginx and Relay. + let mut payload_size = 2000; // Reserve 2KB for the message metadata. + payload_size += replay_event_payload.as_ref().map_or(0, |b| b.len()); + payload_size += replay_video_payload.as_ref().map_or(0, |b| b.len()); + payload_size += item.payload().len(); // If the recording payload can not fit in to the message do not produce and quit early. - // - // TODO: Should we emit an outcome here? - if item.payload().len() >= max_payload_size { + if payload_size >= max_payload_size { relay_log::warn!("replay_recording over maximum size."); + self.outcome_aggregator.send(TrackOutcome { + category: DataCategory::Replay, + event_id, + outcome: Outcome::Invalid(DiscardReason::TooLarge), + quantity: 1, + remote_addr: None, + scoping, + timestamp: instant_to_date_time(start_time), + }); return Ok(()); } From 898da00b8f4375bf61280996fcd63a1996b4694b Mon Sep 17 00:00:00 2001 From: Colton Allen Date: Tue, 13 Feb 2024 15:50:34 -0600 Subject: [PATCH 39/72] Add docs --- relay-replays/src/video.rs | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/relay-replays/src/video.rs b/relay-replays/src/video.rs index d2136ab35a..f786d4753e 100644 --- a/relay-replays/src/video.rs +++ b/relay-replays/src/video.rs @@ -1,8 +1,24 @@ +//! ReplayVideo validator. +//! +//! ReplayVideo envelope-item types contain headers which must be present to complete processing. use bytes::Bytes; use serde::Deserialize; use serde_json; use std::fmt; +/// Validates ReplayVideo envelope-item payloads. +/// +/// # Usage +/// +/// Accepts a borrowed reference to `Bytes` as input. +/// +/// # Errors +/// +/// This function will return errors if: +/// - The message is empty. +/// - If the header seperator character could not be found. +/// - If the headers are invalid. +/// - If the body value is empty. pub fn validate_replay_video(payload: &Bytes) -> Result<(), VideoError> { // Validate data was provided. if payload.is_empty() { @@ -33,6 +49,7 @@ struct VideoHeaders { segment_id: u16, } +/// Error returned from [`validate_replay_video`]. #[derive(Debug)] pub enum VideoError { /// An error parsing the JSON payload. From bab920219ed1ccb813bdf97eadb295bd2630fa56 Mon Sep 17 00:00:00 2001 From: Colton Allen Date: Tue, 13 Feb 2024 15:51:26 -0600 Subject: [PATCH 40/72] Update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c16fe27be1..e0bc2083d0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ **Features**: - Add protobuf support for ingesting OpenTelemetry spans and use official `opentelemetry-proto` generated structs. ([#3044](https://github.com/getsentry/relay/pull/3044)) +- Adds ReplayVideo envelope-item type to support mobile replays. ([#3105](https://github.com/getsentry/relay/pull/3105)) **Internal**: From 7baee11fb5c96bc899afdc007024ae237c5bc3eb Mon Sep 17 00:00:00 2001 From: Colton Allen Date: Tue, 13 Feb 2024 15:51:46 -0600 Subject: [PATCH 41/72] Re-phrase changelog entry --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e0bc2083d0..f769bbdf38 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,7 @@ **Features**: - Add protobuf support for ingesting OpenTelemetry spans and use official `opentelemetry-proto` generated structs. ([#3044](https://github.com/getsentry/relay/pull/3044)) -- Adds ReplayVideo envelope-item type to support mobile replays. ([#3105](https://github.com/getsentry/relay/pull/3105)) +- Adds ReplayVideo envelope-item type. ([#3105](https://github.com/getsentry/relay/pull/3105)) **Internal**: From 35dd886d134f1ff502e04d3ead089cc3587dfda2 Mon Sep 17 00:00:00 2001 From: Joshua Ferge Date: Tue, 13 Feb 2024 16:10:54 -0800 Subject: [PATCH 42/72] only produce replay events if replay recording / event exists --- relay-server/src/services/store.rs | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/relay-server/src/services/store.rs b/relay-server/src/services/store.rs index ada3b99f1b..c5835c1115 100644 --- a/relay-server/src/services/store.rs +++ b/relay-server/src/services/store.rs @@ -274,15 +274,17 @@ impl StoreService { } } - self.produce_replay_messages( - replay_event, - replay_recording, - event_id.ok_or(StoreError::NoEventId)?, - scoping, - start_time, - retention, - send_combined_replay_envelope, - )?; + if replay_event.is_some() || replay_recording.is_some() { + self.produce_replay_messages( + replay_event, + replay_recording, + event_id.ok_or(StoreError::NoEventId)?, + scoping, + start_time, + retention, + send_combined_replay_envelope, + )?; + } if event_item.is_none() && attachments.is_empty() { // No event-related content. All done. From 12ce17049b0e7e9adb0d7aa5c8991922ad8b4aef Mon Sep 17 00:00:00 2001 From: Colton Allen Date: Wed, 14 Feb 2024 22:31:32 -0600 Subject: [PATCH 43/72] Remove redundant boolean condition --- relay-server/src/services/store.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/relay-server/src/services/store.rs b/relay-server/src/services/store.rs index c5835c1115..be9b7709aa 100644 --- a/relay-server/src/services/store.rs +++ b/relay-server/src/services/store.rs @@ -859,7 +859,7 @@ impl StoreService { } if let Some(replay_recording) = replay_recording { - let combined_replay_event = if send_combined_replay_envelope && replay_event.is_some() { + let combined_replay_event = if send_combined_replay_envelope { replay_event } else { None From 70e20f3e19964544b9c72770ba65cf10470266a2 Mon Sep 17 00:00:00 2001 From: Colton Allen Date: Wed, 14 Feb 2024 22:34:53 -0600 Subject: [PATCH 44/72] Flatten extraction and consider replay_event size when measuring payload size --- relay-server/src/services/store.rs | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/relay-server/src/services/store.rs b/relay-server/src/services/store.rs index be9b7709aa..10129c219c 100644 --- a/relay-server/src/services/store.rs +++ b/relay-server/src/services/store.rs @@ -795,18 +795,21 @@ impl StoreService { start_time: Instant, retention: u16, ) -> Result<(), StoreError> { - // 2000 bytes are reserved for the message metadata. - let max_message_metadata_size = 2000; + // Map the event item to it's byte payload value. + let replay_event_payload = replay_event.map(|rv| rv.payload()); - // Remaining bytes can be filled by the payload. - let max_payload_size = self.config.max_replay_message_size() - max_message_metadata_size; + // Maximum number of bytes accepted by the consumer. + let max_payload_size = self.config.max_replay_message_size(); - let mut replay_event_payload = None; - if let Some(replay_event) = replay_event { - replay_event_payload = Some(replay_event.payload()); - } + // Size of the consumer message. We can be reasonably sure this won't overflow because + // of the request size validation provided by Nginx and Relay. + // + // NOTE: We could cast everything to `isize`. + let mut payload_size = 2000; // Reserve 2KB for the message metadata. + payload_size += replay_event_payload.as_ref().map_or(0, |b| b.len()); + payload_size += item.payload().len(); - if item.payload().len() < max_payload_size { + if payload_size >= max_payload_size { let message = KafkaMessage::ReplayRecordingNotChunked(ReplayRecordingNotChunkedKafkaMessage { replay_id: event_id.ok_or(StoreError::NoEventId)?, From 68c5a02fc71e063287dd1bcbe47e3c6ea555988a Mon Sep 17 00:00:00 2001 From: Colton Allen Date: Wed, 14 Feb 2024 23:19:12 -0600 Subject: [PATCH 45/72] Replay-event variable is only populated if the flag is enabled --- relay-server/src/services/store.rs | 60 +++++++++++------------------- 1 file changed, 21 insertions(+), 39 deletions(-) diff --git a/relay-server/src/services/store.rs b/relay-server/src/services/store.rs index 10129c219c..20039c2baa 100644 --- a/relay-server/src/services/store.rs +++ b/relay-server/src/services/store.rs @@ -197,7 +197,6 @@ impl StoreService { let mut replay_event = None; let mut replay_recording = None; - let mut send_combined_replay_envelope = false; for item in envelope.items() { match item.ty() { @@ -248,16 +247,21 @@ impl StoreService { item, )?, ItemType::ReplayRecording => { - if item.replay_combined_payload() { - send_combined_replay_envelope = true - } replay_recording = Some(item); } ItemType::ReplayEvent => { if item.replay_combined_payload() { - send_combined_replay_envelope = true + replay_event = Some(item); } - replay_event = Some(item); + + self.produce_replay_event( + event_id.ok_or(StoreError::NoEventId)?, + scoping.organization_id, + scoping.project_id, + start_time, + retention, + item, + )?; } ItemType::CheckIn => self.produce_check_in( scoping.organization_id, @@ -274,15 +278,14 @@ impl StoreService { } } - if replay_event.is_some() || replay_recording.is_some() { + if let Some(recording) = replay_recording { self.produce_replay_messages( replay_event, - replay_recording, + recording, event_id.ok_or(StoreError::NoEventId)?, scoping, start_time, retention, - send_combined_replay_envelope, )?; } @@ -839,44 +842,23 @@ impl StoreService { Ok(()) } - #[allow(clippy::too_many_arguments)] fn produce_replay_messages( &self, replay_event: Option<&Item>, - replay_recording: Option<&Item>, + replay_recording: &Item, replay_id: EventId, scoping: Scoping, start_time: Instant, retention_days: u16, - send_combined_replay_envelope: bool, ) -> Result<(), StoreError> { - if let Some(replay_event) = replay_event { - self.produce_replay_event( - replay_id, - scoping.organization_id, - scoping.project_id, - start_time, - retention_days, - replay_event, - )?; - } - - if let Some(replay_recording) = replay_recording { - let combined_replay_event = if send_combined_replay_envelope { - replay_event - } else { - None - }; - - self.produce_replay_recording( - Some(replay_id), - scoping, - replay_recording, - combined_replay_event, - start_time, - retention_days, - )?; - } + self.produce_replay_recording( + Some(replay_id), + scoping, + replay_recording, + replay_event, + start_time, + retention_days, + )?; Ok(()) } From 306245a9b5869e5b576d6c9a1f0b41d6b8552202 Mon Sep 17 00:00:00 2001 From: Colton Allen Date: Wed, 14 Feb 2024 23:25:43 -0600 Subject: [PATCH 46/72] Remove redundant method --- relay-server/src/services/store.rs | 28 ++++------------------------ 1 file changed, 4 insertions(+), 24 deletions(-) diff --git a/relay-server/src/services/store.rs b/relay-server/src/services/store.rs index 20039c2baa..ecab9ceec2 100644 --- a/relay-server/src/services/store.rs +++ b/relay-server/src/services/store.rs @@ -279,11 +279,11 @@ impl StoreService { } if let Some(recording) = replay_recording { - self.produce_replay_messages( - replay_event, - recording, - event_id.ok_or(StoreError::NoEventId)?, + self.produce_replay_recording( + event_id, scoping, + recording, + replay_event, start_time, retention, )?; @@ -842,26 +842,6 @@ impl StoreService { Ok(()) } - fn produce_replay_messages( - &self, - replay_event: Option<&Item>, - replay_recording: &Item, - replay_id: EventId, - scoping: Scoping, - start_time: Instant, - retention_days: u16, - ) -> Result<(), StoreError> { - self.produce_replay_recording( - Some(replay_id), - scoping, - replay_recording, - replay_event, - start_time, - retention_days, - )?; - - Ok(()) - } fn produce_check_in( &self, organization_id: u64, From 6ad66f90b3d446d00e0e2ccd462d46134d17ee83 Mon Sep 17 00:00:00 2001 From: Colton Allen Date: Wed, 14 Feb 2024 23:25:59 -0600 Subject: [PATCH 47/72] Remove isize note --- relay-server/src/services/store.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/relay-server/src/services/store.rs b/relay-server/src/services/store.rs index ecab9ceec2..caf888acd4 100644 --- a/relay-server/src/services/store.rs +++ b/relay-server/src/services/store.rs @@ -806,8 +806,6 @@ impl StoreService { // Size of the consumer message. We can be reasonably sure this won't overflow because // of the request size validation provided by Nginx and Relay. - // - // NOTE: We could cast everything to `isize`. let mut payload_size = 2000; // Reserve 2KB for the message metadata. payload_size += replay_event_payload.as_ref().map_or(0, |b| b.len()); payload_size += item.payload().len(); From 2959b9d8488acf92bbcf9c125641df4fa055a53a Mon Sep 17 00:00:00 2001 From: Colton Allen Date: Wed, 14 Feb 2024 23:42:46 -0600 Subject: [PATCH 48/72] Correct boolean condition --- relay-server/src/services/store.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/relay-server/src/services/store.rs b/relay-server/src/services/store.rs index caf888acd4..1e2e00ee6e 100644 --- a/relay-server/src/services/store.rs +++ b/relay-server/src/services/store.rs @@ -810,7 +810,7 @@ impl StoreService { payload_size += replay_event_payload.as_ref().map_or(0, |b| b.len()); payload_size += item.payload().len(); - if payload_size >= max_payload_size { + if payload_size < max_payload_size { let message = KafkaMessage::ReplayRecordingNotChunked(ReplayRecordingNotChunkedKafkaMessage { replay_id: event_id.ok_or(StoreError::NoEventId)?, From b96ca079e9bc2b68f7befaac9cfc2509d8555de8 Mon Sep 17 00:00:00 2001 From: Colton Allen Date: Wed, 14 Feb 2024 23:44:09 -0600 Subject: [PATCH 49/72] Emit outcome if the payload is too large --- relay-server/src/services/store.rs | 59 ++++++++++++++++++------------ 1 file changed, 35 insertions(+), 24 deletions(-) diff --git a/relay-server/src/services/store.rs b/relay-server/src/services/store.rs index 1e2e00ee6e..b2b000b5ab 100644 --- a/relay-server/src/services/store.rs +++ b/relay-server/src/services/store.rs @@ -810,32 +810,43 @@ impl StoreService { payload_size += replay_event_payload.as_ref().map_or(0, |b| b.len()); payload_size += item.payload().len(); - if payload_size < max_payload_size { - let message = - KafkaMessage::ReplayRecordingNotChunked(ReplayRecordingNotChunkedKafkaMessage { - replay_id: event_id.ok_or(StoreError::NoEventId)?, - project_id: scoping.project_id, - key_id: scoping.key_id, - org_id: scoping.organization_id, - received: UnixTimestamp::from_instant(start_time).as_secs(), - retention_days: retention, - payload: item.payload(), - replay_event: replay_event_payload, - }); + // If the recording payload can not fit in to the message do not produce and quit early. + if payload_size >= max_payload_size { + relay_log::warn!("replay_recording over maximum size."); + self.outcome_aggregator.send(TrackOutcome { + category: DataCategory::Replay, + event_id, + outcome: Outcome::Invalid(DiscardReason::TooLarge), + quantity: 1, + remote_addr: None, + scoping, + timestamp: instant_to_date_time(start_time), + }); + return Ok(()); + } - self.produce( - KafkaTopic::ReplayRecordings, - scoping.organization_id, - message, - )?; + let message = + KafkaMessage::ReplayRecordingNotChunked(ReplayRecordingNotChunkedKafkaMessage { + replay_id: event_id.ok_or(StoreError::NoEventId)?, + project_id: scoping.project_id, + key_id: scoping.key_id, + org_id: scoping.organization_id, + received: UnixTimestamp::from_instant(start_time).as_secs(), + retention_days: retention, + payload: item.payload(), + replay_event: replay_event_payload, + }); - metric!( - counter(RelayCounters::ProcessingMessageProduced) += 1, - event_type = "replay_recording_not_chunked" - ); - } else { - relay_log::warn!("replay_recording over maximum size."); - }; + self.produce( + KafkaTopic::ReplayRecordings, + scoping.organization_id, + message, + )?; + + metric!( + counter(RelayCounters::ProcessingMessageProduced) += 1, + event_type = "replay_recording_not_chunked" + ); Ok(()) } From fb1480f1d3f51cfff3c7d102d0750c866aeab28d Mon Sep 17 00:00:00 2001 From: Colton Allen Date: Thu, 15 Feb 2024 07:04:24 -0600 Subject: [PATCH 50/72] Rename error and update comments --- relay-server/src/services/processor.rs | 8 +++----- relay-server/src/services/processor/replay.rs | 9 ++++++--- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/relay-server/src/services/processor.rs b/relay-server/src/services/processor.rs index 015094a805..f7cf1f339e 100644 --- a/relay-server/src/services/processor.rs +++ b/relay-server/src/services/processor.rs @@ -346,10 +346,8 @@ pub enum ProcessingError { #[error("invalid pii config")] PiiConfigError(PiiConfigError), - // A replay is made up of a maximum of three envelope items. If one of the items - // is dropped then the rest should be dropped with it. - #[error("one or more parts of the replay were dropped")] - PartiallyDroppedReplayEnvelope, + #[error("replay envelope was incomplete or partially dropped")] + IncompleteReplayEnvelope, } impl ProcessingError { @@ -392,7 +390,7 @@ impl ProcessingError { Self::EventFiltered(_) => None, // Replay - Self::PartiallyDroppedReplayEnvelope => None, + Self::IncompleteReplayEnvelope => None, } } diff --git a/relay-server/src/services/processor/replay.rs b/relay-server/src/services/processor/replay.rs index 0b96d08814..1ab3b70d5e 100644 --- a/relay-server/src/services/processor/replay.rs +++ b/relay-server/src/services/processor/replay.rs @@ -55,10 +55,12 @@ pub fn process( let combined_envelope_items = project_state.has_feature(Feature::SessionReplayCombinedEnvelopeItems); - // If any envelope item is dropped the whole request should be abandoned. + // If any item in the envelope was dropped we record it. let mut item_dropped = false; state.managed_envelope.retain_items(|item| { + // If replays aren't enabled or an item was dropped - drop the remainder of the + // envelope. if !replays_enabled || item_dropped { return ItemAction::DropSilently; } @@ -141,9 +143,10 @@ pub fn process( } }); - // If an envelope-item was dropped return an error result to drop the entire envelope. + // If an item was dropped we emitted one failure outcome for the failing item and skipped + // the rest. We return an error to exit processing early. if item_dropped { - return Err(ProcessingError::PartiallyDroppedReplayEnvelope); + return Err(ProcessingError::IncompleteReplayEnvelope); } Ok(()) From 0fa467837f9741683bc97a384e387a998eefc3e5 Mon Sep 17 00:00:00 2001 From: Colton Allen Date: Thu, 15 Feb 2024 07:44:51 -0600 Subject: [PATCH 51/72] Add replay-video test coverage --- tests/integration/test_replay_recordings.py | 45 +++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/tests/integration/test_replay_recordings.py b/tests/integration/test_replay_recordings.py index a4ac8877c5..ed9fc56d0f 100644 --- a/tests/integration/test_replay_recordings.py +++ b/tests/integration/test_replay_recordings.py @@ -138,6 +138,51 @@ def test_nonchunked_replay_recordings_processing( outcomes_consumer.assert_empty() +def test_replay_recording_with_video( + mini_sentry, relay_with_processing, replay_recordings_consumer, outcomes_consumer +): + project_id = 42 + org_id = 0 + replay_id = "515539018c9b4260a6f999572f1661ee" + relay = relay_with_processing() + mini_sentry.add_basic_project_config( + project_id, extra={"config": {"features": ["organizations:session-replay"]}} + ) + replay_recordings_consumer = replay_recordings_consumer() + outcomes_consumer = outcomes_consumer() + + envelope = Envelope( + headers=[ + [ + "event_id", + replay_id, + ], + ["attachment_type", "replay_recording"], + ] + ) + payload = recording_payload(b"[]") + envelope.add_item(Item(payload=PayloadRef(bytes=payload), type="replay_recording")) + envelope.add_item( + Item(payload=PayloadRef(bytes=b"hello, world!"), type="replay_video") + ) + + relay.send_envelope(project_id, envelope) + + # Get the non-chunked replay-recording message from the kafka queue. + replay_recording = replay_recordings_consumer.get_not_chunked_replay(timeout=10) + assert replay_recording["replay_id"] == replay_id + assert replay_recording["project_id"] == project_id + assert replay_recording["key_id"] == 123 + assert replay_recording["org_id"] == org_id + assert type(replay_recording["received"]) == int + assert replay_recording["retention_days"] == 90 + assert replay_recording["payload"] == payload + assert replay_recording["type"] == "replay_recording_not_chunked" + assert replay_recording["replay_video"] == b"hello, world!" + + outcomes_consumer.assert_empty() + + def recording_payload(bits: bytes): compressed_payload = zlib.compress(bits) return b'{"segment_id": 0}\n' + compressed_payload From 7a3ee9d054f0a9eb877070e873b195a55d4877f8 Mon Sep 17 00:00:00 2001 From: Colton Allen Date: Thu, 15 Feb 2024 07:45:07 -0600 Subject: [PATCH 52/72] Remove unused test case --- tests/integration/test_replay_recordings.py | 66 --------------------- 1 file changed, 66 deletions(-) diff --git a/tests/integration/test_replay_recordings.py b/tests/integration/test_replay_recordings.py index ed9fc56d0f..7c036e7b99 100644 --- a/tests/integration/test_replay_recordings.py +++ b/tests/integration/test_replay_recordings.py @@ -31,72 +31,6 @@ def test_replay_recordings(mini_sentry, relay_chain): assert replay_recording.startswith(b"{}\n") # The body is compressed -@pytest.mark.skip("sends a broken payload that gets dropped") -def test_chunked_replay_recordings_processing( - mini_sentry, relay_with_processing, replay_recordings_consumer, outcomes_consumer -): - project_id = 42 - org_id = 0 - replay_id = "515539018c9b4260a6f999572f1661ee" - relay = relay_with_processing() - mini_sentry.add_basic_project_config( - project_id, extra={"config": {"features": ["organizations:session-replay"]}} - ) - replay_recordings_consumer = replay_recordings_consumer() - outcomes_consumer = outcomes_consumer() - - # The smallest chunked payload possible. - bits = b"1" * (1000 * 1000 - 1999) - - envelope = Envelope( - headers=[ - [ - "event_id", - replay_id, - ], - ["attachment_type", "replay_recording"], - ] - ) - envelope.add_item(Item(payload=PayloadRef(bytes=bits), type="replay_recording")) - - relay.send_envelope(project_id, envelope) - - replay_recording_contents = {} - replay_recording_ids = [] - replay_recording_num_chunks = {} - - for _ in range(2): - chunk, v = replay_recordings_consumer.get_chunked_replay_chunk() - replay_recording_contents[v["id"]] = ( - replay_recording_contents.get(v["id"], b"") + chunk - ) - if v["id"] not in replay_recording_ids: - replay_recording_ids.append(v["id"]) - num_chunks = 1 + replay_recording_num_chunks.get(v["id"], 0) - assert v["chunk_index"] == num_chunks - 1 - replay_recording_num_chunks[v["id"]] = num_chunks - - id1 = replay_recording_ids[0] - - assert replay_recording_contents[id1] == bits - - replay_recording = replay_recordings_consumer.get_chunked_replay() - - assert replay_recording["type"] == "replay_recording" - assert replay_recording["replay_recording"] == { - "chunks": replay_recording_num_chunks[id1], - "id": id1, - "size": len(replay_recording_contents[id1]), - } - assert replay_recording["replay_id"] == replay_id - assert replay_recording["project_id"] == project_id - assert replay_recording["org_id"] == org_id - assert replay_recording["key_id"] == 123 - assert replay_recording["retention_days"] == 90 - assert replay_recording["received"] - assert type(replay_recording["received"]) == int - - def test_nonchunked_replay_recordings_processing( mini_sentry, relay_with_processing, replay_recordings_consumer, outcomes_consumer ): From c492902fee273c0c49977cda16d08901a7c4f03f Mon Sep 17 00:00:00 2001 From: Colton Allen Date: Tue, 20 Feb 2024 12:29:36 -0600 Subject: [PATCH 53/72] Add support for SDK merged replay-video envelope item --- relay-replays/src/lib.rs | 1 - relay-replays/src/video.rs | 85 ------ relay-server/src/services/processor.rs | 6 - relay-server/src/services/processor/replay.rs | 266 +++++++++++++----- tests/integration/test_replay_recordings.py | 1 + 5 files changed, 189 insertions(+), 170 deletions(-) delete mode 100644 relay-replays/src/video.rs diff --git a/relay-replays/src/lib.rs b/relay-replays/src/lib.rs index c7f3555081..b9c718385e 100644 --- a/relay-replays/src/lib.rs +++ b/relay-replays/src/lib.rs @@ -15,4 +15,3 @@ pub mod recording; mod transform; -pub mod video; diff --git a/relay-replays/src/video.rs b/relay-replays/src/video.rs deleted file mode 100644 index f786d4753e..0000000000 --- a/relay-replays/src/video.rs +++ /dev/null @@ -1,85 +0,0 @@ -//! ReplayVideo validator. -//! -//! ReplayVideo envelope-item types contain headers which must be present to complete processing. -use bytes::Bytes; -use serde::Deserialize; -use serde_json; -use std::fmt; - -/// Validates ReplayVideo envelope-item payloads. -/// -/// # Usage -/// -/// Accepts a borrowed reference to `Bytes` as input. -/// -/// # Errors -/// -/// This function will return errors if: -/// - The message is empty. -/// - If the header seperator character could not be found. -/// - If the headers are invalid. -/// - If the body value is empty. -pub fn validate_replay_video(payload: &Bytes) -> Result<(), VideoError> { - // Validate data was provided. - if payload.is_empty() { - return Err(VideoError::Message("no video message found")); - } - - // Validate we were able to find leading headers. - let mut split = payload.splitn(2, |b| b == &b'\n'); - let header = split - .next() - .ok_or(VideoError::Message("no video headers found"))?; - - // Validate the body contains data. - match split.next() { - Some(b"") | None => return Err(VideoError::Message("no video payload found")), - _ => {} - }; - - // Validate the headers are in the appropriate format. - serde_json::from_slice::(header)?; - - Ok(()) -} - -#[allow(dead_code)] -#[derive(Debug, Deserialize)] -struct VideoHeaders { - segment_id: u16, -} - -/// Error returned from [`validate_replay_video`]. -#[derive(Debug)] -pub enum VideoError { - /// An error parsing the JSON payload. - Parse(serde_json::Error), - /// Validation of the payload failed. - /// - /// The body is empty, is missing the headers, or the body. - Message(&'static str), -} - -impl fmt::Display for VideoError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - VideoError::Parse(serde_error) => write!(f, "{serde_error}"), - VideoError::Message(message) => write!(f, "{message}"), - } - } -} - -impl std::error::Error for VideoError { - fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { - match self { - VideoError::Parse(e) => Some(e), - VideoError::Message(_) => None, - } - } -} - -impl From for VideoError { - fn from(err: serde_json::Error) -> Self { - VideoError::Parse(err) - } -} diff --git a/relay-server/src/services/processor.rs b/relay-server/src/services/processor.rs index f558fa83dd..31242d91c7 100644 --- a/relay-server/src/services/processor.rs +++ b/relay-server/src/services/processor.rs @@ -362,9 +362,6 @@ pub enum ProcessingError { #[error("invalid pii config")] PiiConfigError(PiiConfigError), - - #[error("replay envelope was incomplete or partially dropped")] - IncompleteReplayEnvelope, } impl ProcessingError { @@ -405,9 +402,6 @@ impl ProcessingError { // These outcomes are emitted at the source. Self::MissingProjectId => None, Self::EventFiltered(_) => None, - - // Replay - Self::IncompleteReplayEnvelope => None, } } diff --git a/relay-server/src/services/processor/replay.rs b/relay-server/src/services/processor/replay.rs index 1ab3b70d5e..9f16eeb1fe 100644 --- a/relay-server/src/services/processor/replay.rs +++ b/relay-server/src/services/processor/replay.rs @@ -1,22 +1,21 @@ //! Replay related processor code. - use std::error::Error; use std::net::IpAddr; -use bytes::Bytes; use relay_config::Config; use relay_dynamic_config::{Feature, ProjectConfig}; use relay_event_normalization::replay::{self, ReplayError}; use relay_event_normalization::RawUserAgentInfo; use relay_event_schema::processor::{self, ProcessingState}; -use relay_event_schema::protocol::Replay; +use relay_event_schema::protocol::{EventId, Replay}; use relay_pii::PiiProcessor; use relay_protocol::Annotated; use relay_replays::recording::RecordingScrubber; -use relay_replays::video::validate_replay_video; use relay_statsd::metric; +use rmp_serde; +use serde::{Deserialize, Serialize}; -use crate::envelope::{ContentType, ItemType}; +use crate::envelope::{ContentType, Item, ItemType}; use crate::services::outcome::{DiscardReason, Outcome}; use crate::services::processor::{ProcessEnvelopeState, ProcessingError, ReplayGroup}; use crate::statsd::RelayTimers; @@ -55,106 +54,107 @@ pub fn process( let combined_envelope_items = project_state.has_feature(Feature::SessionReplayCombinedEnvelopeItems); - // If any item in the envelope was dropped we record it. - let mut item_dropped = false; - state.managed_envelope.retain_items(|item| { // If replays aren't enabled or an item was dropped - drop the remainder of the // envelope. - if !replays_enabled || item_dropped { + if !replays_enabled { return ItemAction::DropSilently; } + // Set the combined payload header to the value of the combined feature. + item.set_replay_combined_payload(combined_envelope_items); + match item.ty() { ItemType::ReplayEvent => { - if combined_envelope_items { - item.set_replay_combined_payload(true); - } - - match process_replay_event(&item.payload(), project_config, client_addr, user_agent) - { - Ok(replay) => match replay.to_json() { - Ok(json) => { - item.set_payload(ContentType::Json, json); - ItemAction::Keep - } - Err(error) => { - relay_log::error!( - error = &error as &dyn Error, - "failed to serialize replay" - ); - ItemAction::Keep - } - }, - Err(error) => { - item_dropped = true; - relay_log::warn!(error = &error as &dyn Error, "invalid replay event"); - ItemAction::Drop(Outcome::Invalid(match error { - ReplayError::NoContent => DiscardReason::InvalidReplayEventNoPayload, - ReplayError::CouldNotScrub(_) => DiscardReason::InvalidReplayEventPii, - ReplayError::CouldNotParse(_) => DiscardReason::InvalidReplayEvent, - ReplayError::InvalidPayload(_) => DiscardReason::InvalidReplayEvent, - })) + match handle_replay_event_item( + &item.payload(), + project_config, + client_addr, + user_agent, + ) { + ProcessingAction::Drop(action) => action, + ProcessingAction::Keep => ItemAction::Keep, + ProcessingAction::Replace(replay_event) => { + item.set_payload(ContentType::Json, replay_event); + ItemAction::Keep } } } ItemType::ReplayRecording => { - if combined_envelope_items { - item.set_replay_combined_payload(true); - } - - // XXX: Processing is there just for data scrubbing. Skip the entire expensive - // processing step if we do not need to scrub. - if !scrubbing_enabled || scrubber.is_empty() { - return ItemAction::Keep; - } - - // Limit expansion of recordings to the max replay size. The payload is - // decompressed temporarily and then immediately re-compressed. However, to - // limit memory pressure, we use the replay limit as a good overall limit for - // allocations. - let parsed_recording = metric!(timer(RelayTimers::ReplayRecordingProcessing), { - scrubber.process_recording(&item.payload()) - }); - - match parsed_recording { - Ok(recording) => { - item.set_payload(ContentType::OctetStream, recording); + match handle_replay_recording_item( + &item.payload(), + &event_id, + scrubbing_enabled, + &mut scrubber, + ) { + ProcessingAction::Drop(action) => action, + ProcessingAction::Keep => ItemAction::Keep, + ProcessingAction::Replace(replay_recording) => { + item.set_payload(ContentType::OctetStream, replay_recording); ItemAction::Keep } - Err(e) => { - item_dropped = true; - relay_log::warn!("replay-recording-event: {e} {event_id:?}"); - ItemAction::Drop(Outcome::Invalid( - DiscardReason::InvalidReplayRecordingEvent, - )) - } } } - ItemType::ReplayVideo => match validate_replay_video(&item.payload()) { - Ok(()) => ItemAction::Keep, - Err(e) => { - item_dropped = true; - relay_log::warn!("could not parse video headers: {e} {event_id:?}"); - ItemAction::Drop(Outcome::Invalid(DiscardReason::InvalidReplayVideoEvent)) + ItemType::ReplayVideo => match handle_replay_video_item( + item, + &event_id, + project_config, + client_addr, + user_agent, + scrubbing_enabled, + &mut scrubber, + ) { + ProcessingAction::Drop(action) => action, + ProcessingAction::Keep => ItemAction::Keep, + ProcessingAction::Replace(replay_video) => { + item.set_payload(ContentType::OctetStream, replay_video); + ItemAction::Keep } }, _ => ItemAction::Keep, } }); - // If an item was dropped we emitted one failure outcome for the failing item and skipped - // the rest. We return an error to exit processing early. - if item_dropped { - return Err(ProcessingError::IncompleteReplayEnvelope); - } - Ok(()) } +enum ProcessingAction { + Drop(ItemAction), + Keep, + Replace(T), +} + +// Replay Event Processing. + +fn handle_replay_event_item( + payload: &[u8], + config: &ProjectConfig, + client_ip: Option, + user_agent: &RawUserAgentInfo<&str>, +) -> ProcessingAction { + match process_replay_event(payload, config, client_ip, user_agent) { + Ok(replay) => match replay.to_json() { + Ok(json) => ProcessingAction::Replace(json), + Err(error) => { + relay_log::error!(error = &error as &dyn Error, "failed to serialize replay"); + ProcessingAction::Keep + } + }, + Err(error) => { + relay_log::warn!(error = &error as &dyn Error, "invalid replay event"); + ProcessingAction::Drop(ItemAction::Drop(Outcome::Invalid(match error { + ReplayError::NoContent => DiscardReason::InvalidReplayEventNoPayload, + ReplayError::CouldNotScrub(_) => DiscardReason::InvalidReplayEventPii, + ReplayError::CouldNotParse(_) => DiscardReason::InvalidReplayEvent, + ReplayError::InvalidPayload(_) => DiscardReason::InvalidReplayEvent, + }))) + } + } +} + /// Validates, normalizes, and scrubs PII from a replay event. fn process_replay_event( - payload: &Bytes, + payload: &[u8], config: &ProjectConfig, client_ip: Option, user_agent: &RawUserAgentInfo<&str>, @@ -187,3 +187,113 @@ fn process_replay_event( Ok(replay) } + +// Replay Recording Processing + +fn handle_replay_recording_item( + payload: &[u8], + event_id: &Option, + scrubbing_enabled: bool, + scrubber: &mut RecordingScrubber, +) -> ProcessingAction> { + // XXX: Processing is there just for data scrubbing. Skip the entire expensive + // processing step if we do not need to scrub. + if !scrubbing_enabled || scrubber.is_empty() { + return ProcessingAction::Keep; + } + + // Limit expansion of recordings to the max replay size. The payload is + // decompressed temporarily and then immediately re-compressed. However, to + // limit memory pressure, we use the replay limit as a good overall limit for + // allocations. + let parsed_recording = metric!(timer(RelayTimers::ReplayRecordingProcessing), { + scrubber.process_recording(payload) + }); + + match parsed_recording { + Ok(recording) => ProcessingAction::Replace(recording), + Err(e) => { + relay_log::warn!("replay-recording-event: {e} {event_id:?}"); + ProcessingAction::Drop(ItemAction::Drop(Outcome::Invalid( + DiscardReason::InvalidReplayRecordingEvent, + ))) + } + } +} + +// Replay Video Processing + +#[derive(Debug, Deserialize, Serialize)] +struct ReplayVideoEvent { + replay_event: Vec, + replay_recording: Vec, + replay_video: Vec, +} + +fn handle_replay_video_item( + item: &mut Item, + event_id: &Option, + config: &ProjectConfig, + client_ip: Option, + user_agent: &RawUserAgentInfo<&str>, + scrubbing_enabled: bool, + scrubber: &mut RecordingScrubber, +) -> ProcessingAction> { + let event: ReplayVideoEvent = match rmp_serde::from_slice(&item.payload()) { + Ok(result) => result, + Err(e) => { + relay_log::warn!("replay-video-event: {e} {event_id:?}"); + return ProcessingAction::Drop(ItemAction::Drop(Outcome::Invalid( + DiscardReason::InvalidReplayVideoEvent, + ))); + } + }; + + // Process as a replay-event envelope item. + let replay_event = + match handle_replay_event_item(&event.replay_event, config, client_ip, user_agent) { + ProcessingAction::Drop(action) => { + return ProcessingAction::Drop(action); + } + ProcessingAction::Keep => event.replay_event, + ProcessingAction::Replace(msg) => msg.as_bytes().to_vec(), + }; + + // Process as a replay-recording envelope item. + let replay_recording = match handle_replay_recording_item( + &event.replay_recording, + event_id, + scrubbing_enabled, + scrubber, + ) { + ProcessingAction::Drop(action) => { + return ProcessingAction::Drop(action); + } + ProcessingAction::Keep => event.replay_recording, + ProcessingAction::Replace(msg) => msg, + }; + + // Verify the replay-video payload is not empty. + if event.replay_video.is_empty() { + return ProcessingAction::Drop(ItemAction::Drop(Outcome::Invalid( + DiscardReason::InvalidReplayVideoEvent, + ))); + } + + // Because values were borrowed from the initial struct we take the newly copied + let out_event = ReplayVideoEvent { + replay_event, + replay_recording, + replay_video: event.replay_video, + }; + + match rmp_serde::to_vec(&out_event) { + Ok(vec) => ProcessingAction::Replace(vec), + Err(e) => { + relay_log::warn!("replay-video-event: {e} {event_id:?}"); + ProcessingAction::Drop(ItemAction::Drop(Outcome::Invalid( + DiscardReason::InvalidReplayVideoEvent, + ))) + } + } +} diff --git a/tests/integration/test_replay_recordings.py b/tests/integration/test_replay_recordings.py index 7c036e7b99..2eb1f7f54e 100644 --- a/tests/integration/test_replay_recordings.py +++ b/tests/integration/test_replay_recordings.py @@ -92,6 +92,7 @@ def test_replay_recording_with_video( replay_id, ], ["attachment_type", "replay_recording"], + ["attachment_type", "replay_video"], ] ) payload = recording_payload(b"[]") From 279660e7a0a92782a3cc399290f012a3b589f823 Mon Sep 17 00:00:00 2001 From: Colton Allen Date: Tue, 20 Feb 2024 13:12:25 -0600 Subject: [PATCH 54/72] Set processing items on the envelope item's headers --- relay-server/src/envelope.rs | 17 ++++++++++++++ relay-server/src/services/processor/replay.rs | 22 ++++--------------- 2 files changed, 21 insertions(+), 18 deletions(-) diff --git a/relay-server/src/envelope.rs b/relay-server/src/envelope.rs index b84b102014..858e6a210b 100644 --- a/relay-server/src/envelope.rs +++ b/relay-server/src/envelope.rs @@ -513,6 +513,11 @@ pub struct ItemHeaders { #[serde(default, skip)] replay_combined_payload: bool, + /// The parsed replay-event and replay-recording payloads. + /// NOTE: This is internal-only and not exposed into the Envelope. + #[serde(default, skip)] + replay_video_events: Option<(Vec, Vec)>, + /// Contains the amount of events this item was generated and aggregated from. /// /// A [metrics buckets](`ItemType::MetricBuckets`) item contains metrics extracted and @@ -601,6 +606,7 @@ impl Item { routing_hint: None, rate_limited: false, replay_combined_payload: false, + replay_video_events: None, source_quantities: None, sample_rates: None, other: BTreeMap::new(), @@ -777,6 +783,17 @@ impl Item { self.headers.replay_combined_payload = combined_payload; } + /// Returns the payload's replay video events. + #[cfg(feature = "processing")] + pub fn replay_video_events(&self) -> Option<(Vec, Vec)> { + self.headers.replay_video_events.clone() + } + + /// Set the replay video events attribute for this item. + pub fn set_replay_video_events(&mut self, replay_event: Vec, replay_recording: Vec) { + self.headers.replay_video_events = Some((replay_event, replay_recording)); + } + /// Sets sample rates for this item. pub fn set_sample_rates(&mut self, sample_rates: Value) { if matches!(sample_rates, Value::Array(ref a) if !a.is_empty()) { diff --git a/relay-server/src/services/processor/replay.rs b/relay-server/src/services/processor/replay.rs index 9f16eeb1fe..ec6fed7c9b 100644 --- a/relay-server/src/services/processor/replay.rs +++ b/relay-server/src/services/processor/replay.rs @@ -106,7 +106,8 @@ pub fn process( ) { ProcessingAction::Drop(action) => action, ProcessingAction::Keep => ItemAction::Keep, - ProcessingAction::Replace(replay_video) => { + ProcessingAction::Replace((replay_event, replay_recording, replay_video)) => { + item.set_replay_video_events(replay_event, replay_recording); item.set_payload(ContentType::OctetStream, replay_video); ItemAction::Keep } @@ -238,7 +239,7 @@ fn handle_replay_video_item( user_agent: &RawUserAgentInfo<&str>, scrubbing_enabled: bool, scrubber: &mut RecordingScrubber, -) -> ProcessingAction> { +) -> ProcessingAction<(Vec, Vec, Vec)> { let event: ReplayVideoEvent = match rmp_serde::from_slice(&item.payload()) { Ok(result) => result, Err(e) => { @@ -280,20 +281,5 @@ fn handle_replay_video_item( ))); } - // Because values were borrowed from the initial struct we take the newly copied - let out_event = ReplayVideoEvent { - replay_event, - replay_recording, - replay_video: event.replay_video, - }; - - match rmp_serde::to_vec(&out_event) { - Ok(vec) => ProcessingAction::Replace(vec), - Err(e) => { - relay_log::warn!("replay-video-event: {e} {event_id:?}"); - ProcessingAction::Drop(ItemAction::Drop(Outcome::Invalid( - DiscardReason::InvalidReplayVideoEvent, - ))) - } - } + ProcessingAction::Replace((replay_event, replay_recording, event.replay_video)) } From c2227d4b31527f7530abb1006883d7350f35d1e5 Mon Sep 17 00:00:00 2001 From: Colton Allen Date: Tue, 20 Feb 2024 15:52:14 -0600 Subject: [PATCH 55/72] Update store to accept combined ReplayVideo item type --- relay-server/src/services/store.rs | 61 ++++++++++++++++++++---------- 1 file changed, 40 insertions(+), 21 deletions(-) diff --git a/relay-server/src/services/store.rs b/relay-server/src/services/store.rs index 77b292e5fb..17e1283d11 100644 --- a/relay-server/src/services/store.rs +++ b/relay-server/src/services/store.rs @@ -197,7 +197,6 @@ impl StoreService { let mut replay_event = None; let mut replay_recording = None; - let mut replay_video = None; for item in envelope.items() { match item.ty() { @@ -247,7 +246,26 @@ impl StoreService { start_time, item, )?, - ItemType::ReplayVideo => replay_video = Some(item), + ItemType::ReplayVideo => { + // ReplayVideo item types set their headers in the processor with a special + // replay_video_events field. This is done to save us from serializing the + // payload in the processor and then deserializing the message in this stage. + if let Some((event, recording)) = item.replay_video_events() { + // ReplayVideo item types always produce the replay-event onto the + // replay-recording Kafka topic regardless of the value of + // "SessionReplayCombinedEnvelopeItems" feature which applies to legacy + // events. + self.produce_replay_recording( + event_id, + scoping, + Bytes::from(recording), + Some(Bytes::from(event)), + Some(item.payload()), + start_time, + retention, + )?; + } + } ItemType::ReplayRecording => { replay_recording = Some(item); } @@ -262,7 +280,7 @@ impl StoreService { scoping.project_id, start_time, retention, - item, + item.payload(), )?; } ItemType::CheckIn => self.produce_check_in( @@ -281,12 +299,17 @@ impl StoreService { } if let Some(recording) = replay_recording { + // If a recording item type was seen we produce it to Kafka with the replay-event + // payload (should it have been provided). + // + // The replay_video value is always specified as `None`. We do not allow separate + // item types for `ReplayVideo` events. self.produce_replay_recording( event_id, scoping, - recording, - replay_event, - replay_video, + recording.payload(), + replay_event.map(|rv| rv.payload()), + None, start_time, retention, )?; @@ -771,14 +794,14 @@ impl StoreService { project_id: ProjectId, start_time: Instant, retention_days: u16, - item: &Item, + payload: Bytes, ) -> Result<(), StoreError> { let message = ReplayEventKafkaMessage { replay_id, project_id, retention_days, start_time: UnixTimestamp::from_instant(start_time).as_secs(), - payload: item.payload(), + payload: payload, }; self.produce( KafkaTopic::ReplayEvents, @@ -797,25 +820,21 @@ impl StoreService { &self, event_id: Option, scoping: Scoping, - item: &Item, - replay_event: Option<&Item>, - replay_video: Option<&Item>, + payload: Bytes, + replay_event: Option, + replay_video: Option, start_time: Instant, retention: u16, ) -> Result<(), StoreError> { - // Map the event and video items to their byte payload values. - let replay_event_payload = replay_event.map(|rv| rv.payload()); - let replay_video_payload = replay_video.map(|rv| rv.payload()); - // Maximum number of bytes accepted by the consumer. let max_payload_size = self.config.max_replay_message_size(); // Size of the consumer message. We can be reasonably sure this won't overflow because // of the request size validation provided by Nginx and Relay. let mut payload_size = 2000; // Reserve 2KB for the message metadata. - payload_size += replay_event_payload.as_ref().map_or(0, |b| b.len()); - payload_size += replay_video_payload.as_ref().map_or(0, |b| b.len()); - payload_size += item.payload().len(); + payload_size += replay_event.as_ref().map_or(0, |b| b.len()); + payload_size += replay_video.as_ref().map_or(0, |b| b.len()); + payload_size += payload.len(); // If the recording payload can not fit in to the message do not produce and quit early. if payload_size >= max_payload_size { @@ -840,9 +859,9 @@ impl StoreService { org_id: scoping.organization_id, received: UnixTimestamp::from_instant(start_time).as_secs(), retention_days: retention, - payload: item.payload(), - replay_event: replay_event_payload, - replay_video: replay_video_payload, + payload, + replay_event, + replay_video, }); self.produce( From 7db085eb3efa77d8a500f9f48d6d9bfb0bf59fe6 Mon Sep 17 00:00:00 2001 From: Colton Allen Date: Tue, 20 Feb 2024 19:37:58 -0600 Subject: [PATCH 56/72] Fix lint --- relay-server/src/services/store.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/relay-server/src/services/store.rs b/relay-server/src/services/store.rs index 17e1283d11..774e302a79 100644 --- a/relay-server/src/services/store.rs +++ b/relay-server/src/services/store.rs @@ -801,7 +801,7 @@ impl StoreService { project_id, retention_days, start_time: UnixTimestamp::from_instant(start_time).as_secs(), - payload: payload, + payload, }; self.produce( KafkaTopic::ReplayEvents, From cb024ce581eca4597de719c87870d47fc28991a7 Mon Sep 17 00:00:00 2001 From: Colton Allen Date: Tue, 20 Feb 2024 20:27:58 -0600 Subject: [PATCH 57/72] Add ReplayVideo to ReplayGroup --- relay-server/src/services/processor.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/relay-server/src/services/processor.rs b/relay-server/src/services/processor.rs index 31242d91c7..7a73ff6ef5 100644 --- a/relay-server/src/services/processor.rs +++ b/relay-server/src/services/processor.rs @@ -204,7 +204,7 @@ impl ProcessingGroup { let replay_items = envelope.take_items_by(|item| { matches!( item.ty(), - &ItemType::ReplayEvent | &ItemType::ReplayRecording + &ItemType::ReplayEvent | &ItemType::ReplayRecording | &ItemType::ReplayVideo ) }); if !replay_items.is_empty() { From 8ffa74afb64cd4ccd8787e187cb2916fc99f50a4 Mon Sep 17 00:00:00 2001 From: Colton Allen Date: Tue, 20 Feb 2024 20:28:15 -0600 Subject: [PATCH 58/72] Add serde_bytes --- Cargo.lock | 10 ++++++++++ relay-server/Cargo.toml | 1 + relay-server/src/services/processor/replay.rs | 3 +++ 3 files changed, 14 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index 5a507863a0..41d6859ca3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4122,6 +4122,7 @@ dependencies = [ "rmp-serde", "rust-embed", "serde", + "serde_bytes", "serde_json", "serde_path_to_error", "similar-asserts", @@ -4708,6 +4709,15 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "serde_bytes" +version = "0.11.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b8497c313fd43ab992087548117643f6fcd935cbf36f176ffda0aacf9591734" +dependencies = [ + "serde", +] + [[package]] name = "serde_derive" version = "1.0.189" diff --git a/relay-server/Cargo.toml b/relay-server/Cargo.toml index d80ca6b087..56c383368c 100644 --- a/relay-server/Cargo.toml +++ b/relay-server/Cargo.toml @@ -97,6 +97,7 @@ reqwest = { version = "0.11.1", features = [ rmp-serde = "1.1.1" rust-embed = { version = "8.0.0", optional = true } serde = { workspace = true } +serde_bytes = { version = "0.11.14" } serde_json = { workspace = true } smallvec = { workspace = true, features = ["drain_filter"] } sqlx = { version = "0.7.3", features = [ diff --git a/relay-server/src/services/processor/replay.rs b/relay-server/src/services/processor/replay.rs index ec6fed7c9b..ee917edeaf 100644 --- a/relay-server/src/services/processor/replay.rs +++ b/relay-server/src/services/processor/replay.rs @@ -226,8 +226,11 @@ fn handle_replay_recording_item( #[derive(Debug, Deserialize, Serialize)] struct ReplayVideoEvent { + #[serde(with = "serde_bytes")] replay_event: Vec, + #[serde(with = "serde_bytes")] replay_recording: Vec, + #[serde(with = "serde_bytes")] replay_video: Vec, } From 3d81a063d1f71635787bd8d917d18b0e22e7fbba Mon Sep 17 00:00:00 2001 From: Colton Allen Date: Tue, 20 Feb 2024 20:28:24 -0600 Subject: [PATCH 59/72] Add test_replay_videos test module --- tests/integration/test_replay_recordings.py | 46 -------------- tests/integration/test_replay_videos.py | 69 +++++++++++++++++++++ 2 files changed, 69 insertions(+), 46 deletions(-) create mode 100644 tests/integration/test_replay_videos.py diff --git a/tests/integration/test_replay_recordings.py b/tests/integration/test_replay_recordings.py index 2eb1f7f54e..2ca221e50a 100644 --- a/tests/integration/test_replay_recordings.py +++ b/tests/integration/test_replay_recordings.py @@ -72,52 +72,6 @@ def test_nonchunked_replay_recordings_processing( outcomes_consumer.assert_empty() -def test_replay_recording_with_video( - mini_sentry, relay_with_processing, replay_recordings_consumer, outcomes_consumer -): - project_id = 42 - org_id = 0 - replay_id = "515539018c9b4260a6f999572f1661ee" - relay = relay_with_processing() - mini_sentry.add_basic_project_config( - project_id, extra={"config": {"features": ["organizations:session-replay"]}} - ) - replay_recordings_consumer = replay_recordings_consumer() - outcomes_consumer = outcomes_consumer() - - envelope = Envelope( - headers=[ - [ - "event_id", - replay_id, - ], - ["attachment_type", "replay_recording"], - ["attachment_type", "replay_video"], - ] - ) - payload = recording_payload(b"[]") - envelope.add_item(Item(payload=PayloadRef(bytes=payload), type="replay_recording")) - envelope.add_item( - Item(payload=PayloadRef(bytes=b"hello, world!"), type="replay_video") - ) - - relay.send_envelope(project_id, envelope) - - # Get the non-chunked replay-recording message from the kafka queue. - replay_recording = replay_recordings_consumer.get_not_chunked_replay(timeout=10) - assert replay_recording["replay_id"] == replay_id - assert replay_recording["project_id"] == project_id - assert replay_recording["key_id"] == 123 - assert replay_recording["org_id"] == org_id - assert type(replay_recording["received"]) == int - assert replay_recording["retention_days"] == 90 - assert replay_recording["payload"] == payload - assert replay_recording["type"] == "replay_recording_not_chunked" - assert replay_recording["replay_video"] == b"hello, world!" - - outcomes_consumer.assert_empty() - - def recording_payload(bits: bytes): compressed_payload = zlib.compress(bits) return b'{"segment_id": 0}\n' + compressed_payload diff --git a/tests/integration/test_replay_videos.py b/tests/integration/test_replay_videos.py new file mode 100644 index 0000000000..002005f59c --- /dev/null +++ b/tests/integration/test_replay_videos.py @@ -0,0 +1,69 @@ +from .test_replay_events import generate_replay_sdk_event +from .test_replay_recordings import recording_payload +from sentry_sdk.envelope import Envelope, Item, PayloadRef + +import msgpack +import json + + +def test_replay_recording_with_video( + mini_sentry, + relay_with_processing, + replay_recordings_consumer, + outcomes_consumer, +): + project_id = 42 + org_id = 0 + replay_id = "515539018c9b4260a6f999572f1661ee" + relay = relay_with_processing() + mini_sentry.add_basic_project_config( + project_id, extra={"config": {"features": ["organizations:session-replay"]}} + ) + replay_recordings_consumer = replay_recordings_consumer() + outcomes_consumer = outcomes_consumer() + + _recording_payload = recording_payload(b"[]") + payload = msgpack.packb( + { + "replay_event": json.dumps(generate_replay_sdk_event(replay_id)).encode(), + "replay_recording": _recording_payload, + "replay_video": b"hello, world!", + } + ) + + envelope = Envelope( + headers=[ + [ + "event_id", + replay_id, + ], + ["attachment_type", "replay_video"], + ] + ) + envelope.add_item(Item(payload=PayloadRef(bytes=payload), type="replay_video")) + + relay.send_envelope(project_id, envelope) + + # Get the non-chunked replay-recording message from the kafka queue. + replay_recording = replay_recordings_consumer.get_not_chunked_replay(timeout=10) + + # Assert the recording payload appears normally. + assert replay_recording["replay_id"] == replay_id + assert replay_recording["project_id"] == project_id + assert replay_recording["key_id"] == 123 + assert replay_recording["org_id"] == org_id + assert type(replay_recording["received"]) == int + assert replay_recording["retention_days"] == 90 + assert replay_recording["payload"] == _recording_payload + assert replay_recording["type"] == "replay_recording_not_chunked" + assert replay_recording["replay_event"] is not None + + # Assert the replay-video bytes were published to the consumer. + assert replay_recording["replay_video"] == b"hello, world!" + + # Assert the replay-event bytes were published to the consumer. + replay_event = json.loads(replay_recording["replay_event"]) + assert replay_event["type"] == "replay_event" + assert replay_event["replay_id"] == "515539018c9b4260a6f999572f1661ee" + + outcomes_consumer.assert_empty() From b49f9d5d507f0d8f1d64d25a9ebe7ae6646cc21f Mon Sep 17 00:00:00 2001 From: Colton Allen Date: Tue, 20 Feb 2024 20:36:36 -0600 Subject: [PATCH 60/72] Remove unused import --- tests/integration/test_replay_recordings.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/integration/test_replay_recordings.py b/tests/integration/test_replay_recordings.py index 2ca221e50a..3f4d8c9a15 100644 --- a/tests/integration/test_replay_recordings.py +++ b/tests/integration/test_replay_recordings.py @@ -1,4 +1,3 @@ -import pytest import zlib from sentry_sdk.envelope import Envelope, Item, PayloadRef From bed786b962f06c261a0ff2608d6086ec9ee054ba Mon Sep 17 00:00:00 2001 From: Colton Allen Date: Wed, 21 Feb 2024 08:04:38 -0600 Subject: [PATCH 61/72] Pass drop as an Outcome and allow the outerscope to manage how that outcome is handled --- relay-server/src/services/processor/replay.rs | 24 ++++++++----------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/relay-server/src/services/processor/replay.rs b/relay-server/src/services/processor/replay.rs index ee917edeaf..67fe24a04f 100644 --- a/relay-server/src/services/processor/replay.rs +++ b/relay-server/src/services/processor/replay.rs @@ -72,7 +72,7 @@ pub fn process( client_addr, user_agent, ) { - ProcessingAction::Drop(action) => action, + ProcessingAction::Drop(outcome) => ItemAction::Drop(outcome), ProcessingAction::Keep => ItemAction::Keep, ProcessingAction::Replace(replay_event) => { item.set_payload(ContentType::Json, replay_event); @@ -87,7 +87,7 @@ pub fn process( scrubbing_enabled, &mut scrubber, ) { - ProcessingAction::Drop(action) => action, + ProcessingAction::Drop(outcome) => ItemAction::Drop(outcome), ProcessingAction::Keep => ItemAction::Keep, ProcessingAction::Replace(replay_recording) => { item.set_payload(ContentType::OctetStream, replay_recording); @@ -104,7 +104,7 @@ pub fn process( scrubbing_enabled, &mut scrubber, ) { - ProcessingAction::Drop(action) => action, + ProcessingAction::Drop(outcome) => ItemAction::Drop(outcome), ProcessingAction::Keep => ItemAction::Keep, ProcessingAction::Replace((replay_event, replay_recording, replay_video)) => { item.set_replay_video_events(replay_event, replay_recording); @@ -120,7 +120,7 @@ pub fn process( } enum ProcessingAction { - Drop(ItemAction), + Drop(Outcome), Keep, Replace(T), } @@ -143,12 +143,12 @@ fn handle_replay_event_item( }, Err(error) => { relay_log::warn!(error = &error as &dyn Error, "invalid replay event"); - ProcessingAction::Drop(ItemAction::Drop(Outcome::Invalid(match error { + ProcessingAction::Drop(Outcome::Invalid(match error { ReplayError::NoContent => DiscardReason::InvalidReplayEventNoPayload, ReplayError::CouldNotScrub(_) => DiscardReason::InvalidReplayEventPii, ReplayError::CouldNotParse(_) => DiscardReason::InvalidReplayEvent, ReplayError::InvalidPayload(_) => DiscardReason::InvalidReplayEvent, - }))) + })) } } } @@ -215,9 +215,7 @@ fn handle_replay_recording_item( Ok(recording) => ProcessingAction::Replace(recording), Err(e) => { relay_log::warn!("replay-recording-event: {e} {event_id:?}"); - ProcessingAction::Drop(ItemAction::Drop(Outcome::Invalid( - DiscardReason::InvalidReplayRecordingEvent, - ))) + ProcessingAction::Drop(Outcome::Invalid(DiscardReason::InvalidReplayRecordingEvent)) } } } @@ -247,9 +245,9 @@ fn handle_replay_video_item( Ok(result) => result, Err(e) => { relay_log::warn!("replay-video-event: {e} {event_id:?}"); - return ProcessingAction::Drop(ItemAction::Drop(Outcome::Invalid( + return ProcessingAction::Drop(Outcome::Invalid( DiscardReason::InvalidReplayVideoEvent, - ))); + )); } }; @@ -279,9 +277,7 @@ fn handle_replay_video_item( // Verify the replay-video payload is not empty. if event.replay_video.is_empty() { - return ProcessingAction::Drop(ItemAction::Drop(Outcome::Invalid( - DiscardReason::InvalidReplayVideoEvent, - ))); + return ProcessingAction::Drop(Outcome::Invalid(DiscardReason::InvalidReplayVideoEvent)); } ProcessingAction::Replace((replay_event, replay_recording, event.replay_video)) From c95f6a495f7a2a575de6574a55d584a508b07828 Mon Sep 17 00:00:00 2001 From: Colton Allen Date: Thu, 22 Feb 2024 07:19:46 -0600 Subject: [PATCH 62/72] Revert "Pass drop as an Outcome and allow the outerscope to manage how that outcome is handled" This reverts commit bed786b962f06c261a0ff2608d6086ec9ee054ba. --- relay-server/src/services/processor/replay.rs | 24 +++++++++++-------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/relay-server/src/services/processor/replay.rs b/relay-server/src/services/processor/replay.rs index 67fe24a04f..ee917edeaf 100644 --- a/relay-server/src/services/processor/replay.rs +++ b/relay-server/src/services/processor/replay.rs @@ -72,7 +72,7 @@ pub fn process( client_addr, user_agent, ) { - ProcessingAction::Drop(outcome) => ItemAction::Drop(outcome), + ProcessingAction::Drop(action) => action, ProcessingAction::Keep => ItemAction::Keep, ProcessingAction::Replace(replay_event) => { item.set_payload(ContentType::Json, replay_event); @@ -87,7 +87,7 @@ pub fn process( scrubbing_enabled, &mut scrubber, ) { - ProcessingAction::Drop(outcome) => ItemAction::Drop(outcome), + ProcessingAction::Drop(action) => action, ProcessingAction::Keep => ItemAction::Keep, ProcessingAction::Replace(replay_recording) => { item.set_payload(ContentType::OctetStream, replay_recording); @@ -104,7 +104,7 @@ pub fn process( scrubbing_enabled, &mut scrubber, ) { - ProcessingAction::Drop(outcome) => ItemAction::Drop(outcome), + ProcessingAction::Drop(action) => action, ProcessingAction::Keep => ItemAction::Keep, ProcessingAction::Replace((replay_event, replay_recording, replay_video)) => { item.set_replay_video_events(replay_event, replay_recording); @@ -120,7 +120,7 @@ pub fn process( } enum ProcessingAction { - Drop(Outcome), + Drop(ItemAction), Keep, Replace(T), } @@ -143,12 +143,12 @@ fn handle_replay_event_item( }, Err(error) => { relay_log::warn!(error = &error as &dyn Error, "invalid replay event"); - ProcessingAction::Drop(Outcome::Invalid(match error { + ProcessingAction::Drop(ItemAction::Drop(Outcome::Invalid(match error { ReplayError::NoContent => DiscardReason::InvalidReplayEventNoPayload, ReplayError::CouldNotScrub(_) => DiscardReason::InvalidReplayEventPii, ReplayError::CouldNotParse(_) => DiscardReason::InvalidReplayEvent, ReplayError::InvalidPayload(_) => DiscardReason::InvalidReplayEvent, - })) + }))) } } } @@ -215,7 +215,9 @@ fn handle_replay_recording_item( Ok(recording) => ProcessingAction::Replace(recording), Err(e) => { relay_log::warn!("replay-recording-event: {e} {event_id:?}"); - ProcessingAction::Drop(Outcome::Invalid(DiscardReason::InvalidReplayRecordingEvent)) + ProcessingAction::Drop(ItemAction::Drop(Outcome::Invalid( + DiscardReason::InvalidReplayRecordingEvent, + ))) } } } @@ -245,9 +247,9 @@ fn handle_replay_video_item( Ok(result) => result, Err(e) => { relay_log::warn!("replay-video-event: {e} {event_id:?}"); - return ProcessingAction::Drop(Outcome::Invalid( + return ProcessingAction::Drop(ItemAction::Drop(Outcome::Invalid( DiscardReason::InvalidReplayVideoEvent, - )); + ))); } }; @@ -277,7 +279,9 @@ fn handle_replay_video_item( // Verify the replay-video payload is not empty. if event.replay_video.is_empty() { - return ProcessingAction::Drop(Outcome::Invalid(DiscardReason::InvalidReplayVideoEvent)); + return ProcessingAction::Drop(ItemAction::Drop(Outcome::Invalid( + DiscardReason::InvalidReplayVideoEvent, + ))); } ProcessingAction::Replace((replay_event, replay_recording, event.replay_video)) From e6e193c8136e83e0206950cb0822b347c80bde55 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Thu, 22 Feb 2024 14:20:10 +0100 Subject: [PATCH 63/72] ref(replays): video item (#3149) Serialize processed replay video item back into the envelope instead of putting it on the header. #skip-changelog --- relay-server/Cargo.toml | 5 +- relay-server/src/envelope.rs | 17 --- relay-server/src/services/processor/replay.rs | 122 +++++++----------- relay-server/src/services/store.rs | 93 +++++++++---- 4 files changed, 116 insertions(+), 121 deletions(-) diff --git a/relay-server/Cargo.toml b/relay-server/Cargo.toml index 56c383368c..947594dd77 100644 --- a/relay-server/Cargo.toml +++ b/relay-server/Cargo.toml @@ -23,7 +23,6 @@ processing = [ "dep:symbolic-common", "dep:symbolic-unreal", "dep:zstd", - "bytes/serde", "relay-cardinality/redis", "relay-config/processing", "relay-kafka/producer", @@ -47,7 +46,7 @@ axum-server = "0.4.7" backoff = "0.4.0" brotli = "3.3.4" bytecount = "0.6.0" -bytes = { version = "1.4.0" } +bytes = { version = "1.4.0", features = ["serde"] } chrono = { workspace = true, features = ["clock"] } data-encoding = "2.3.3" flate2 = "1.0.19" @@ -97,7 +96,7 @@ reqwest = { version = "0.11.1", features = [ rmp-serde = "1.1.1" rust-embed = { version = "8.0.0", optional = true } serde = { workspace = true } -serde_bytes = { version = "0.11.14" } +serde_bytes = "0.11" serde_json = { workspace = true } smallvec = { workspace = true, features = ["drain_filter"] } sqlx = { version = "0.7.3", features = [ diff --git a/relay-server/src/envelope.rs b/relay-server/src/envelope.rs index 858e6a210b..b84b102014 100644 --- a/relay-server/src/envelope.rs +++ b/relay-server/src/envelope.rs @@ -513,11 +513,6 @@ pub struct ItemHeaders { #[serde(default, skip)] replay_combined_payload: bool, - /// The parsed replay-event and replay-recording payloads. - /// NOTE: This is internal-only and not exposed into the Envelope. - #[serde(default, skip)] - replay_video_events: Option<(Vec, Vec)>, - /// Contains the amount of events this item was generated and aggregated from. /// /// A [metrics buckets](`ItemType::MetricBuckets`) item contains metrics extracted and @@ -606,7 +601,6 @@ impl Item { routing_hint: None, rate_limited: false, replay_combined_payload: false, - replay_video_events: None, source_quantities: None, sample_rates: None, other: BTreeMap::new(), @@ -783,17 +777,6 @@ impl Item { self.headers.replay_combined_payload = combined_payload; } - /// Returns the payload's replay video events. - #[cfg(feature = "processing")] - pub fn replay_video_events(&self) -> Option<(Vec, Vec)> { - self.headers.replay_video_events.clone() - } - - /// Set the replay video events attribute for this item. - pub fn set_replay_video_events(&mut self, replay_event: Vec, replay_recording: Vec) { - self.headers.replay_video_events = Some((replay_event, replay_recording)); - } - /// Sets sample rates for this item. pub fn set_sample_rates(&mut self, sample_rates: Value) { if matches!(sample_rates, Value::Array(ref a) if !a.is_empty()) { diff --git a/relay-server/src/services/processor/replay.rs b/relay-server/src/services/processor/replay.rs index ee917edeaf..62dd459a39 100644 --- a/relay-server/src/services/processor/replay.rs +++ b/relay-server/src/services/processor/replay.rs @@ -2,6 +2,7 @@ use std::error::Error; use std::net::IpAddr; +use bytes::Bytes; use relay_config::Config; use relay_dynamic_config::{Feature, ProjectConfig}; use relay_event_normalization::replay::{self, ReplayError}; @@ -15,7 +16,7 @@ use relay_statsd::metric; use rmp_serde; use serde::{Deserialize, Serialize}; -use crate::envelope::{ContentType, Item, ItemType}; +use crate::envelope::{ContentType, ItemType}; use crate::services::outcome::{DiscardReason, Outcome}; use crate::services::processor::{ProcessEnvelopeState, ProcessingError, ReplayGroup}; use crate::statsd::RelayTimers; @@ -67,14 +68,13 @@ pub fn process( match item.ty() { ItemType::ReplayEvent => { match handle_replay_event_item( - &item.payload(), + item.payload(), project_config, client_addr, user_agent, ) { - ProcessingAction::Drop(action) => action, - ProcessingAction::Keep => ItemAction::Keep, - ProcessingAction::Replace(replay_event) => { + Err(outcome) => ItemAction::Drop(outcome), + Ok(replay_event) => { item.set_payload(ContentType::Json, replay_event); ItemAction::Keep } @@ -82,21 +82,20 @@ pub fn process( } ItemType::ReplayRecording => { match handle_replay_recording_item( - &item.payload(), + item.payload(), &event_id, scrubbing_enabled, &mut scrubber, ) { - ProcessingAction::Drop(action) => action, - ProcessingAction::Keep => ItemAction::Keep, - ProcessingAction::Replace(replay_recording) => { + Err(outcome) => ItemAction::Drop(outcome), + Ok(replay_recording) => { item.set_payload(ContentType::OctetStream, replay_recording); ItemAction::Keep } } } ItemType::ReplayVideo => match handle_replay_video_item( - item, + item.payload(), &event_id, project_config, client_addr, @@ -104,11 +103,9 @@ pub fn process( scrubbing_enabled, &mut scrubber, ) { - ProcessingAction::Drop(action) => action, - ProcessingAction::Keep => ItemAction::Keep, - ProcessingAction::Replace((replay_event, replay_recording, replay_video)) => { - item.set_replay_video_events(replay_event, replay_recording); - item.set_payload(ContentType::OctetStream, replay_video); + Err(outcome) => ItemAction::Drop(outcome), + Ok(payload) => { + item.set_payload(ContentType::OctetStream, payload); ItemAction::Keep } }, @@ -119,36 +116,30 @@ pub fn process( Ok(()) } -enum ProcessingAction { - Drop(ItemAction), - Keep, - Replace(T), -} - // Replay Event Processing. fn handle_replay_event_item( - payload: &[u8], + payload: Bytes, config: &ProjectConfig, client_ip: Option, user_agent: &RawUserAgentInfo<&str>, -) -> ProcessingAction { - match process_replay_event(payload, config, client_ip, user_agent) { +) -> Result { + match process_replay_event(&payload, config, client_ip, user_agent) { Ok(replay) => match replay.to_json() { - Ok(json) => ProcessingAction::Replace(json), + Ok(json) => Ok(json.into_bytes().into()), Err(error) => { relay_log::error!(error = &error as &dyn Error, "failed to serialize replay"); - ProcessingAction::Keep + Ok(payload) } }, Err(error) => { relay_log::warn!(error = &error as &dyn Error, "invalid replay event"); - ProcessingAction::Drop(ItemAction::Drop(Outcome::Invalid(match error { + Err(Outcome::Invalid(match error { ReplayError::NoContent => DiscardReason::InvalidReplayEventNoPayload, ReplayError::CouldNotScrub(_) => DiscardReason::InvalidReplayEventPii, ReplayError::CouldNotParse(_) => DiscardReason::InvalidReplayEvent, ReplayError::InvalidPayload(_) => DiscardReason::InvalidReplayEvent, - }))) + })) } } } @@ -192,15 +183,15 @@ fn process_replay_event( // Replay Recording Processing fn handle_replay_recording_item( - payload: &[u8], + payload: Bytes, event_id: &Option, scrubbing_enabled: bool, scrubber: &mut RecordingScrubber, -) -> ProcessingAction> { +) -> Result { // XXX: Processing is there just for data scrubbing. Skip the entire expensive // processing step if we do not need to scrub. if !scrubbing_enabled || scrubber.is_empty() { - return ProcessingAction::Keep; + return Ok(payload); } // Limit expansion of recordings to the max replay size. The payload is @@ -208,16 +199,14 @@ fn handle_replay_recording_item( // limit memory pressure, we use the replay limit as a good overall limit for // allocations. let parsed_recording = metric!(timer(RelayTimers::ReplayRecordingProcessing), { - scrubber.process_recording(payload) + scrubber.process_recording(&payload) }); match parsed_recording { - Ok(recording) => ProcessingAction::Replace(recording), + Ok(recording) => Ok(recording.into()), Err(e) => { relay_log::warn!("replay-recording-event: {e} {event_id:?}"); - ProcessingAction::Drop(ItemAction::Drop(Outcome::Invalid( - DiscardReason::InvalidReplayRecordingEvent, - ))) + Err(Outcome::Invalid(DiscardReason::InvalidReplayRecordingEvent)) } } } @@ -226,63 +215,50 @@ fn handle_replay_recording_item( #[derive(Debug, Deserialize, Serialize)] struct ReplayVideoEvent { - #[serde(with = "serde_bytes")] - replay_event: Vec, - #[serde(with = "serde_bytes")] - replay_recording: Vec, - #[serde(with = "serde_bytes")] - replay_video: Vec, + replay_event: Bytes, + replay_recording: Bytes, + replay_video: Bytes, } fn handle_replay_video_item( - item: &mut Item, + payload: Bytes, event_id: &Option, config: &ProjectConfig, client_ip: Option, user_agent: &RawUserAgentInfo<&str>, scrubbing_enabled: bool, scrubber: &mut RecordingScrubber, -) -> ProcessingAction<(Vec, Vec, Vec)> { - let event: ReplayVideoEvent = match rmp_serde::from_slice(&item.payload()) { +) -> Result { + let ReplayVideoEvent { + replay_event, + replay_recording, + replay_video, + } = match rmp_serde::from_slice(&payload) { Ok(result) => result, Err(e) => { relay_log::warn!("replay-video-event: {e} {event_id:?}"); - return ProcessingAction::Drop(ItemAction::Drop(Outcome::Invalid( - DiscardReason::InvalidReplayVideoEvent, - ))); + return Err(Outcome::Invalid(DiscardReason::InvalidReplayVideoEvent)); } }; // Process as a replay-event envelope item. - let replay_event = - match handle_replay_event_item(&event.replay_event, config, client_ip, user_agent) { - ProcessingAction::Drop(action) => { - return ProcessingAction::Drop(action); - } - ProcessingAction::Keep => event.replay_event, - ProcessingAction::Replace(msg) => msg.as_bytes().to_vec(), - }; + let replay_event = handle_replay_event_item(replay_event, config, client_ip, user_agent)?; // Process as a replay-recording envelope item. - let replay_recording = match handle_replay_recording_item( - &event.replay_recording, - event_id, - scrubbing_enabled, - scrubber, - ) { - ProcessingAction::Drop(action) => { - return ProcessingAction::Drop(action); - } - ProcessingAction::Keep => event.replay_recording, - ProcessingAction::Replace(msg) => msg, - }; + let replay_recording = + handle_replay_recording_item(replay_recording, event_id, scrubbing_enabled, scrubber)?; // Verify the replay-video payload is not empty. - if event.replay_video.is_empty() { - return ProcessingAction::Drop(ItemAction::Drop(Outcome::Invalid( - DiscardReason::InvalidReplayVideoEvent, - ))); + if replay_video.is_empty() { + return Err(Outcome::Invalid(DiscardReason::InvalidReplayVideoEvent)); } - ProcessingAction::Replace((replay_event, replay_recording, event.replay_video)) + match rmp_serde::to_vec_named(&ReplayVideoEvent { + replay_event, + replay_recording, + replay_video, + }) { + Ok(payload) => Ok(payload.into()), + Err(_) => Err(Outcome::Invalid(DiscardReason::InvalidReplayVideoEvent)), + } } diff --git a/relay-server/src/services/store.rs b/relay-server/src/services/store.rs index 774e302a79..a2a4560231 100644 --- a/relay-server/src/services/store.rs +++ b/relay-server/src/services/store.rs @@ -247,24 +247,13 @@ impl StoreService { item, )?, ItemType::ReplayVideo => { - // ReplayVideo item types set their headers in the processor with a special - // replay_video_events field. This is done to save us from serializing the - // payload in the processor and then deserializing the message in this stage. - if let Some((event, recording)) = item.replay_video_events() { - // ReplayVideo item types always produce the replay-event onto the - // replay-recording Kafka topic regardless of the value of - // "SessionReplayCombinedEnvelopeItems" feature which applies to legacy - // events. - self.produce_replay_recording( - event_id, - scoping, - Bytes::from(recording), - Some(Bytes::from(event)), - Some(item.payload()), - start_time, - retention, - )?; - } + self.produce_replay_video( + event_id, + scoping, + item.payload(), + start_time, + retention, + )?; } ItemType::ReplayRecording => { replay_recording = Some(item); @@ -304,11 +293,12 @@ impl StoreService { // // The replay_video value is always specified as `None`. We do not allow separate // item types for `ReplayVideo` events. + let replay_event = replay_event.map(|rv| rv.payload()); self.produce_replay_recording( event_id, scoping, - recording.payload(), - replay_event.map(|rv| rv.payload()), + &recording.payload(), + replay_event.as_deref(), None, start_time, retention, @@ -820,9 +810,9 @@ impl StoreService { &self, event_id: Option, scoping: Scoping, - payload: Bytes, - replay_event: Option, - replay_video: Option, + payload: &[u8], + replay_event: Option<&[u8]>, + replay_video: Option<&[u8]>, start_time: Instant, retention: u16, ) -> Result<(), StoreError> { @@ -878,6 +868,50 @@ impl StoreService { Ok(()) } + fn produce_replay_video( + &self, + event_id: Option, + scoping: Scoping, + payload: Bytes, + start_time: Instant, + retention: u16, + ) -> Result<(), StoreError> { + #[derive(Deserialize)] + struct VideoEvent<'a> { + replay_event: &'a [u8], + replay_recording: &'a [u8], + replay_video: &'a [u8], + } + + let Ok(VideoEvent { + replay_video, + replay_event, + replay_recording, + }) = rmp_serde::from_slice::(&payload) + else { + self.outcome_aggregator.send(TrackOutcome { + category: DataCategory::Replay, + event_id, + outcome: Outcome::Invalid(DiscardReason::InvalidReplayEvent), + quantity: 1, + remote_addr: None, + scoping, + timestamp: instant_to_date_time(start_time), + }); + return Ok(()); + }; + + self.produce_replay_recording( + event_id, + scoping, + replay_recording, + Some(replay_event), + Some(replay_video), + start_time, + retention, + ) + } + fn produce_check_in( &self, organization_id: u64, @@ -1171,16 +1205,19 @@ struct ReplayRecordingKafkaMessage { } #[derive(Debug, Serialize)] -struct ReplayRecordingNotChunkedKafkaMessage { +struct ReplayRecordingNotChunkedKafkaMessage<'a> { replay_id: EventId, key_id: Option, org_id: u64, project_id: ProjectId, received: u64, retention_days: u16, - payload: Bytes, - replay_event: Option, - replay_video: Option, + #[serde(with = "serde_bytes")] + payload: &'a [u8], + #[serde(with = "serde_bytes")] + replay_event: Option<&'a [u8]>, + #[serde(with = "serde_bytes")] + replay_video: Option<&'a [u8]>, } /// User report for an event wrapped up in a message ready for consumption in Kafka. @@ -1363,7 +1400,7 @@ enum KafkaMessage<'a> { }, Profile(ProfileKafkaMessage), ReplayEvent(ReplayEventKafkaMessage), - ReplayRecordingNotChunked(ReplayRecordingNotChunkedKafkaMessage), + ReplayRecordingNotChunked(ReplayRecordingNotChunkedKafkaMessage<'a>), CheckIn(CheckInKafkaMessage), Span(SpanKafkaMessage<'a>), } From 22d2091731da1332af1038b6b1b7967f8d3c9dc1 Mon Sep 17 00:00:00 2001 From: Colton Allen Date: Thu, 22 Feb 2024 14:04:06 -0600 Subject: [PATCH 64/72] Update changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 854c44d0da..2f02bc3ce7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ - Extend GPU context with data for Unreal Engine crash reports. ([#3144](https://github.com/getsentry/relay/pull/3144)) - Parametrize transaction in dynamic sampling context. ([#3141](https://github.com/getsentry/relay/pull/3141)) +- Adds ReplayVideo envelope-item type. ([#3105](https://github.com/getsentry/relay/pull/3105)) **Bug Fixes**: @@ -28,7 +29,6 @@ **Features**: - Add protobuf support for ingesting OpenTelemetry spans and use official `opentelemetry-proto` generated structs. ([#3044](https://github.com/getsentry/relay/pull/3044)) -- Adds ReplayVideo envelope-item type. ([#3105](https://github.com/getsentry/relay/pull/3105)) **Internal**: From 8dc8503041c0fc9382bd7b9c2a89ef7718d5fe4c Mon Sep 17 00:00:00 2001 From: Colton Allen Date: Fri, 23 Feb 2024 09:44:10 -0600 Subject: [PATCH 65/72] Produce replay-event to the snuba consumer --- relay-server/src/services/store.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/relay-server/src/services/store.rs b/relay-server/src/services/store.rs index ba3a49a462..2bca97beb0 100644 --- a/relay-server/src/services/store.rs +++ b/relay-server/src/services/store.rs @@ -941,6 +941,15 @@ impl StoreService { return Ok(()); }; + self.produce_replay_event( + event_id.ok_or(StoreError::NoEventId)?, + scoping.organization_id, + scoping.project_id, + start_time, + retention, + replay_event, + )?; + self.produce_replay_recording( event_id, scoping, From 152ceb34264cd118ba59cbc4c90bbecead59266b Mon Sep 17 00:00:00 2001 From: Colton Allen Date: Mon, 26 Feb 2024 08:59:07 -0600 Subject: [PATCH 66/72] Create a new Bytes struct with a copy of the replay-event bytes --- relay-server/src/services/store.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/relay-server/src/services/store.rs b/relay-server/src/services/store.rs index 2bca97beb0..d1f3706088 100644 --- a/relay-server/src/services/store.rs +++ b/relay-server/src/services/store.rs @@ -947,7 +947,7 @@ impl StoreService { scoping.project_id, start_time, retention, - replay_event, + Bytes::from(replay_event.to_owned()), )?; self.produce_replay_recording( From 5109bb941662da39e9088d05cc1dd0034990b416 Mon Sep 17 00:00:00 2001 From: Colton Allen Date: Mon, 26 Feb 2024 09:17:32 -0600 Subject: [PATCH 67/72] Use copy_from_slice --- relay-server/src/services/store.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/relay-server/src/services/store.rs b/relay-server/src/services/store.rs index d1f3706088..7b3cd693cf 100644 --- a/relay-server/src/services/store.rs +++ b/relay-server/src/services/store.rs @@ -947,7 +947,7 @@ impl StoreService { scoping.project_id, start_time, retention, - Bytes::from(replay_event.to_owned()), + Bytes::copy_from_slice(replay_event), )?; self.produce_replay_recording( From 79b066885d8abd9b480e9ced0b497a865db2b650 Mon Sep 17 00:00:00 2001 From: Colton Allen Date: Mon, 26 Feb 2024 10:39:13 -0600 Subject: [PATCH 68/72] Add replay-video produced metric --- relay-server/src/services/store.rs | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/relay-server/src/services/store.rs b/relay-server/src/services/store.rs index 7b3cd693cf..d120afa41c 100644 --- a/relay-server/src/services/store.rs +++ b/relay-server/src/services/store.rs @@ -958,7 +958,14 @@ impl StoreService { Some(replay_video), start_time, retention, - ) + )?; + + metric!( + counter(RelayCounters::ProcessingMessageProduced) += 1, + event_type = "replay_video" + ); + + Ok(()) } fn produce_check_in( From b5b90e5224c882afa0bd6edda4d8388db2e10728 Mon Sep 17 00:00:00 2001 From: Colton Allen Date: Mon, 26 Feb 2024 20:26:54 -0600 Subject: [PATCH 69/72] Abstract replay-event assertion and use in replay-video coverage --- tests/integration/test_replay_events.py | 95 +++++++++++++------------ tests/integration/test_replay_videos.py | 13 +++- 2 files changed, 59 insertions(+), 49 deletions(-) diff --git a/tests/integration/test_replay_events.py b/tests/integration/test_replay_events.py index 820061780a..4adcc731d0 100644 --- a/tests/integration/test_replay_events.py +++ b/tests/integration/test_replay_events.py @@ -46,69 +46,53 @@ def generate_replay_sdk_event(replay_id="d2132d31b39445f1938d7e21b6bf0ec4"): } -def test_replay_event_with_processing( - mini_sentry, relay_with_processing, replay_events_consumer -): - relay = relay_with_processing() - mini_sentry.add_basic_project_config( - 42, extra={"config": {"features": ["organizations:session-replay"]}} - ) - - replay_events_consumer = replay_events_consumer(timeout=10) - replay = generate_replay_sdk_event() - - relay.send_replay_event(42, replay) - - replay_event, replay_event_message = replay_events_consumer.get_replay_event() - assert replay_event["type"] == "replay_event" - assert replay_event["replay_id"] == "d2132d31b39445f1938d7e21b6bf0ec4" - assert replay_event_message["retention_days"] == 90 - - parsed_replay = json.loads(bytes(replay_event_message["payload"])) - # Assert required fields were returned. - assert parsed_replay["replay_id"] == replay["replay_id"] - assert parsed_replay["replay_type"] == replay["replay_type"] - assert parsed_replay["event_id"] == replay["event_id"] - assert parsed_replay["type"] == replay["type"] - assert parsed_replay["segment_id"] == replay["segment_id"] - assert parsed_replay["urls"] == replay["urls"] - assert parsed_replay["error_ids"] == replay["error_ids"] - assert parsed_replay["trace_ids"] == replay["trace_ids"] - assert parsed_replay["dist"] == replay["dist"] - assert parsed_replay["platform"] == replay["platform"] - assert parsed_replay["environment"] == replay["environment"] - assert parsed_replay["release"] == str(replay["release"]) - assert parsed_replay["sdk"]["name"] == replay["sdk"]["name"] - assert parsed_replay["sdk"]["version"] == replay["sdk"]["version"] - assert parsed_replay["user"]["id"] == replay["user"]["id"] - assert parsed_replay["user"]["username"] == replay["user"]["username"] - assert parsed_replay["user"]["ip_address"] == replay["user"]["ip_address"] +def assert_replay_payload_matches(produced, consumed): + assert consumed["type"] == "replay_event" + assert consumed["replay_id"] == produced["replay_id"] + assert consumed["replay_type"] == produced["replay_type"] + assert consumed["event_id"] == produced["event_id"] + assert consumed["type"] == produced["type"] + assert consumed["segment_id"] == produced["segment_id"] + assert consumed["urls"] == produced["urls"] + assert consumed["error_ids"] == produced["error_ids"] + assert consumed["trace_ids"] == produced["trace_ids"] + assert consumed["dist"] == produced["dist"] + assert consumed["platform"] == produced["platform"] + assert consumed["environment"] == produced["environment"] + assert consumed["release"] == str(produced["release"]) + assert consumed["sdk"]["name"] == produced["sdk"]["name"] + assert consumed["sdk"]["version"] == produced["sdk"]["version"] + assert consumed["user"]["id"] == produced["user"]["id"] + assert consumed["user"]["username"] == produced["user"]["username"] + assert consumed["user"]["ip_address"] == produced["user"]["ip_address"] # Assert PII scrubbing. - assert parsed_replay["user"]["email"] == "[email]" + assert consumed["user"]["email"] == "[email]" # Round to account for float imprecision. Not a big deal. Decimals # are dropped in Clickhouse. - assert int(parsed_replay["replay_start_timestamp"]) == int( - replay["replay_start_timestamp"] + assert int(consumed["replay_start_timestamp"]) == int( + produced["replay_start_timestamp"] ) - assert int(parsed_replay["timestamp"]) == int(replay["timestamp"]) + assert int(consumed["timestamp"]) == int(produced["timestamp"]) # Assert the tags and requests objects were normalized to lists of doubles. - assert parsed_replay["tags"] == [["transaction", replay["tags"]["transaction"]]] - assert parsed_replay["request"] == { - "headers": [["User-Agent", replay["request"]["headers"]["user-Agent"]]] + assert consumed["tags"] == [["transaction", produced["tags"]["transaction"]]] + assert consumed["request"] == { + "headers": [["User-Agent", produced["request"]["headers"]["user-Agent"]]] } # Assert contexts object was pulled out. - assert parsed_replay["contexts"] == { + assert consumed["contexts"] == { "browser": {"name": "Safari", "version": "15.5", "type": "browser"}, "device": {"brand": "Apple", "family": "Mac", "model": "Mac", "type": "device"}, "os": {"name": "Mac OS X", "version": ">=10.15.7", "type": "os"}, "replay": { "type": "replay", - "error_sample_rate": replay["contexts"]["replay"]["error_sample_rate"], - "session_sample_rate": replay["contexts"]["replay"]["session_sample_rate"], + "error_sample_rate": produced["contexts"]["replay"]["error_sample_rate"], + "session_sample_rate": produced["contexts"]["replay"][ + "session_sample_rate" + ], }, "trace": { "trace_id": "4c79f60c11214eb38604f4ae0781bfb2", @@ -118,6 +102,23 @@ def test_replay_event_with_processing( } +def test_replay_event_with_processing( + mini_sentry, relay_with_processing, replay_events_consumer +): + relay = relay_with_processing() + mini_sentry.add_basic_project_config( + 42, extra={"config": {"features": ["organizations:session-replay"]}} + ) + + replay_events_consumer = replay_events_consumer(timeout=10) + replay = generate_replay_sdk_event() + + relay.send_replay_event(42, replay) + + replay_event, replay_event_message = replay_events_consumer.get_replay_event() + assert_replay_payload_matches(replay, replay_event) + + def test_replay_events_without_processing(mini_sentry, relay_chain): relay = relay_chain(min_relay_version="latest") diff --git a/tests/integration/test_replay_videos.py b/tests/integration/test_replay_videos.py index 002005f59c..e4fb8dba1b 100644 --- a/tests/integration/test_replay_videos.py +++ b/tests/integration/test_replay_videos.py @@ -1,4 +1,4 @@ -from .test_replay_events import generate_replay_sdk_event +from .test_replay_events import generate_replay_sdk_event, assert_replay_payload_matches from .test_replay_recordings import recording_payload from sentry_sdk.envelope import Envelope, Item, PayloadRef @@ -11,6 +11,7 @@ def test_replay_recording_with_video( relay_with_processing, replay_recordings_consumer, outcomes_consumer, + replay_events_consumer, ): project_id = 42 org_id = 0 @@ -19,13 +20,15 @@ def test_replay_recording_with_video( mini_sentry.add_basic_project_config( project_id, extra={"config": {"features": ["organizations:session-replay"]}} ) + replay = generate_replay_sdk_event(replay_id) + replay_events_consumer = replay_events_consumer(timeout=10) replay_recordings_consumer = replay_recordings_consumer() outcomes_consumer = outcomes_consumer() _recording_payload = recording_payload(b"[]") payload = msgpack.packb( { - "replay_event": json.dumps(generate_replay_sdk_event(replay_id)).encode(), + "replay_event": json.dumps(replay).encode(), "replay_recording": _recording_payload, "replay_video": b"hello, world!", } @@ -66,4 +69,10 @@ def test_replay_recording_with_video( assert replay_event["type"] == "replay_event" assert replay_event["replay_id"] == "515539018c9b4260a6f999572f1661ee" + replay_event, _ = replay_events_consumer.get_replay_event() + assert_replay_payload_matches(replay, replay_event) + + # Assert all conumers are empty. + replay_recordings_consumer.assert_empty() outcomes_consumer.assert_empty() + replay_events_consumer.assert_empty() From af2cde77ce44934218c7c2df835e192e9c4ce369 Mon Sep 17 00:00:00 2001 From: Colton Allen Date: Mon, 26 Feb 2024 20:37:43 -0600 Subject: [PATCH 70/72] Assert retention days --- tests/integration/test_replay_events.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_replay_events.py b/tests/integration/test_replay_events.py index 4adcc731d0..2c82c3e16d 100644 --- a/tests/integration/test_replay_events.py +++ b/tests/integration/test_replay_events.py @@ -1,4 +1,3 @@ -import json import uuid @@ -116,6 +115,7 @@ def test_replay_event_with_processing( relay.send_replay_event(42, replay) replay_event, replay_event_message = replay_events_consumer.get_replay_event() + assert replay_event_message["retention_days"] == 90 assert_replay_payload_matches(replay, replay_event) From 1d7ffdb4cfe85454f67d8a258ab9299beb517fc5 Mon Sep 17 00:00:00 2001 From: Colton Allen Date: Tue, 27 Feb 2024 06:29:21 -0600 Subject: [PATCH 71/72] Emit a single metric --- relay-server/src/services/store.rs | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/relay-server/src/services/store.rs b/relay-server/src/services/store.rs index 7aef92d17f..4f4cf0558e 100644 --- a/relay-server/src/services/store.rs +++ b/relay-server/src/services/store.rs @@ -900,9 +900,15 @@ impl StoreService { message, )?; + let event_type = if replay_video.is_some() { + "replay_recording_with_video" + } else { + "replay_recording_not_chunked" + }; + metric!( counter(RelayCounters::ProcessingMessageProduced) += 1, - event_type = "replay_recording_not_chunked" + event_type = event_type ); Ok(()) @@ -958,14 +964,7 @@ impl StoreService { Some(replay_video), start_time, retention, - )?; - - metric!( - counter(RelayCounters::ProcessingMessageProduced) += 1, - event_type = "replay_video" - ); - - Ok(()) + ) } fn produce_check_in( From 45c2bb8601ef3d03cd38930b8c8ad87dedf93d7d Mon Sep 17 00:00:00 2001 From: Colton Allen Date: Tue, 27 Feb 2024 06:44:59 -0600 Subject: [PATCH 72/72] Remove copy --- relay-server/src/services/store.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/relay-server/src/services/store.rs b/relay-server/src/services/store.rs index 4f4cf0558e..7680e3237e 100644 --- a/relay-server/src/services/store.rs +++ b/relay-server/src/services/store.rs @@ -276,7 +276,7 @@ impl StoreService { scoping.project_id, start_time, retention, - item.payload(), + &item.payload(), )?; } ItemType::CheckIn => self.produce_check_in( @@ -824,7 +824,7 @@ impl StoreService { project_id: ProjectId, start_time: Instant, retention_days: u16, - payload: Bytes, + payload: &[u8], ) -> Result<(), StoreError> { let message = ReplayEventKafkaMessage { replay_id, @@ -953,7 +953,7 @@ impl StoreService { scoping.project_id, start_time, retention, - Bytes::copy_from_slice(replay_event), + replay_event, )?; self.produce_replay_recording( @@ -1254,9 +1254,9 @@ struct EventKafkaMessage { } #[derive(Debug, Serialize)] -struct ReplayEventKafkaMessage { +struct ReplayEventKafkaMessage<'a> { /// Raw event payload. - payload: Bytes, + payload: &'a [u8], /// Time at which the event was received by Relay. start_time: u64, /// The event id. @@ -1612,7 +1612,7 @@ enum KafkaMessage<'a> { message: MetricKafkaMessage<'a>, }, Profile(ProfileKafkaMessage), - ReplayEvent(ReplayEventKafkaMessage), + ReplayEvent(ReplayEventKafkaMessage<'a>), ReplayRecordingNotChunked(ReplayRecordingNotChunkedKafkaMessage<'a>), CheckIn(CheckInKafkaMessage), Span(SpanKafkaMessage<'a>),