From c3676394428c4246e3d3cbb538b01b8fbd05a127 Mon Sep 17 00:00:00 2001 From: Reinaldy Rafli Date: Mon, 2 Sep 2024 21:23:24 +0700 Subject: [PATCH] fix(dynamic-config): use `matches_any_origin` to scrub HTTP hosts in spans (#3939) Closes https://github.com/getsentry/relay/issues/3936 ### Legal Boilerplate Look, I get it. The entity doing business as "Sentry" was incorporated in the State of Delaware in 2015 as Functional Software, Inc. and is gonna need some rights from me in order to utilize my contributions in this here PR. So here's the deal: I retain all rights, title and interest in and to my contributions, and by keeping this boilerplate intact I confirm that Sentry can use, modify, copy, and redistribute my contributions, under Sentry's choice of terms. --------- Co-authored-by: Joris Bayer --- CHANGELOG.md | 1 + Cargo.lock | 1 + relay-dynamic-config/src/global.rs | 3 +- relay-event-normalization/Cargo.toml | 1 + relay-event-normalization/src/event.rs | 3 +- .../src/normalize/span/description/mod.rs | 66 +++++++++++++++++-- .../src/normalize/span/tag_extraction.rs | 8 +-- relay-filter/src/csp.rs | 48 +++++++++++++- relay-server/src/services/processor/span.rs | 3 +- .../src/services/processor/span/processing.rs | 3 +- 10 files changed, 115 insertions(+), 22 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5e83f4e10d..4500d92984 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ **Bug Fixes**: +- Use `matches_any_origin` to scrub HTTP hosts in spans. ([#3939](https://github.com/getsentry/relay/pull/3939)). - Keep frames from both ends of the stacktrace when trimming frames. ([#3905](https://github.com/getsentry/relay/pull/3905)) **Features**: diff --git a/Cargo.lock b/Cargo.lock index db2a4dc17d..02799217dc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3614,6 +3614,7 @@ dependencies = [ "relay-base-schema", "relay-common", "relay-event-schema", + "relay-filter", "relay-log", "relay-protocol", "relay-statsd", diff --git a/relay-dynamic-config/src/global.rs b/relay-dynamic-config/src/global.rs index 8f84907298..692c9da82b 100644 --- a/relay-dynamic-config/src/global.rs +++ b/relay-dynamic-config/src/global.rs @@ -10,7 +10,6 @@ use relay_filter::GenericFiltersConfig; use relay_quotas::Quota; use serde::{de, Deserialize, Serialize}; use serde_json::Value; -use url::Host; use crate::{defaults, ErrorBoundary, MetricExtractionGroup, MetricExtractionGroups}; @@ -192,7 +191,7 @@ pub struct Options { deserialize_with = "default_on_error", skip_serializing_if = "Vec::is_empty" )] - pub http_span_allowed_hosts: Vec, + pub http_span_allowed_hosts: Vec, /// Deprecated, still forwarded for older downstream Relays. #[doc(hidden)] diff --git a/relay-event-normalization/Cargo.toml b/relay-event-normalization/Cargo.toml index 26a62a054a..9f027e5d00 100644 --- a/relay-event-normalization/Cargo.toml +++ b/relay-event-normalization/Cargo.toml @@ -29,6 +29,7 @@ relay-log = { workspace = true } relay-protocol = { workspace = true } relay-statsd = { workspace = true } relay-ua = { workspace = true } +relay-filter = { workspace = true } sentry-release-parser = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } diff --git a/relay-event-normalization/src/event.rs b/relay-event-normalization/src/event.rs index bb859fbcfa..4437415dc3 100644 --- a/relay-event-normalization/src/event.rs +++ b/relay-event-normalization/src/event.rs @@ -25,7 +25,6 @@ use relay_protocol::{ RemarkType, Value, }; use smallvec::SmallVec; -use url::Host; use uuid::Uuid; use crate::normalize::request; @@ -157,7 +156,7 @@ pub struct NormalizationConfig<'a> { pub replay_id: Option, /// Controls list of hosts to be excluded from scrubbing - pub span_allowed_hosts: &'a [Host], + pub span_allowed_hosts: &'a [String], } impl<'a> Default for NormalizationConfig<'a> { diff --git a/relay-event-normalization/src/normalize/span/description/mod.rs b/relay-event-normalization/src/normalize/span/description/mod.rs index 31cc0f0002..9e53e0c5b1 100644 --- a/relay-event-normalization/src/normalize/span/description/mod.rs +++ b/relay-event-normalization/src/normalize/span/description/mod.rs @@ -3,6 +3,7 @@ mod resource; mod sql; use once_cell::sync::Lazy; use psl; +use relay_filter::matches_any_origin; #[cfg(test)] pub use sql::{scrub_queries, Mode}; @@ -38,7 +39,7 @@ const DOMAIN_ALLOW_LIST: &[&str] = &["localhost"]; /// Returns `None` if no scrubbing can be performed. pub(crate) fn scrub_span_description( span: &Span, - span_allowed_hosts: &[Host], + span_allowed_hosts: &[String], ) -> (Option, Option>) { let Some(description) = span.description.as_str() else { return (None, None); @@ -167,7 +168,7 @@ fn scrub_supabase(string: &str) -> Option { Some(DB_SUPABASE_REGEX.replace_all(string, "{%s}").into()) } -fn scrub_http(string: &str, allow_list: &[Host]) -> Option { +fn scrub_http(string: &str, allow_list: &[String]) -> Option { let (method, url) = string.split_once(' ')?; if !HTTP_METHOD_EXTRACTOR_REGEX.is_match(method) { return None; @@ -225,10 +226,15 @@ fn scrub_file(description: &str) -> Option { /// /// assert_eq!(scrub_host(Host::Domain("foo.bar.baz"), &[]), "*.bar.baz"); /// assert_eq!(scrub_host(Host::Ipv4(Ipv4Addr::LOCALHOST), &[]), "127.0.0.1"); -/// assert_eq!(scrub_host(Host::Ipv4(Ipv4Addr::new(8, 8, 8, 8)), &[Host::parse("8.8.8.8").unwrap()]), "8.8.8.8"); +/// assert_eq!(scrub_host(Host::Ipv4(Ipv4Addr::new(8, 8, 8, 8)), &[String::from("8.8.8.8")]), "8.8.8.8"); /// ``` -pub fn scrub_host<'a>(host: Host<&'a str>, allow_list: &'a [Host]) -> Cow<'a, str> { - if allow_list.iter().any(|allowed_host| &host == allowed_host) { +pub fn scrub_host<'a>(host: Host<&'a str>, allow_list: &'a [String]) -> Cow<'a, str> { + let allow_list: Vec<_> = allow_list + .iter() + .map(|origin| origin.as_str().into()) + .collect(); + + if matches_any_origin(Some(host.to_string().as_str()), &allow_list) { return host.to_string().into(); } @@ -534,11 +540,10 @@ fn scrub_function(string: &str) -> Option { #[cfg(test)] mod tests { + use super::*; use relay_protocol::Annotated; use similar_asserts::assert_eq; - use super::*; - macro_rules! span_description_test { // Tests the scrubbed span description for the given op. @@ -1232,4 +1237,51 @@ mod tests { // Can be scrubbed with db system. assert_eq!(scrubbed.0.as_deref(), Some("my-component-name")); } + + #[test] + fn scrub_allowed_host() { + let examples = [ + ( + "https://foo.bar.internal/api/v1/submit", + ["foo.bar.internal".to_string()], + "https://foo.bar.internal", + ), + ( + "http://192.168.1.1:3000", + ["192.168.1.1".to_string()], + "http://192.168.1.1:3000", + ), + ( + "http://[1fff:0:a88:85a3::ac1f]:8001/foo", + ["[1fff:0:a88:85a3::ac1f]".to_string()], + "http://[1fff:0:a88:85a3::ac1f]:8001", + ), + ]; + + for (url, allowed_hosts, expected) in examples { + let json = format!( + r#"{{ + "description": "POST {}", + "span_id": "bd2eb23da2beb459", + "start_timestamp": 1597976393.4619668, + "timestamp": 1597976393.4718769, + "trace_id": "ff62a8b040f340bda5d830223def1d81", + "op": "http.client" + }} + "#, + url, + ); + + let mut span = Annotated::::from_json(&json).unwrap(); + + let scrubbed = + scrub_span_description(span.value_mut().as_mut().unwrap(), &allowed_hosts); + + assert_eq!( + scrubbed.0.as_deref(), + Some(format!("POST {}", expected).as_str()), + "Could not match {url}" + ); + } + } } diff --git a/relay-event-normalization/src/normalize/span/tag_extraction.rs b/relay-event-normalization/src/normalize/span/tag_extraction.rs index 9dc15454ac..412de8fb96 100644 --- a/relay-event-normalization/src/normalize/span/tag_extraction.rs +++ b/relay-event-normalization/src/normalize/span/tag_extraction.rs @@ -16,7 +16,7 @@ use relay_event_schema::protocol::{ use relay_protocol::{Annotated, Empty, Value}; use sqlparser::ast::Visit; use sqlparser::ast::{ObjectName, Visitor}; -use url::{Host, Url}; +use url::Url; use crate::span::country_subregion::Subregion; use crate::span::description::{ @@ -184,7 +184,7 @@ impl std::fmt::Display for RenderBlockingStatus { pub(crate) fn extract_span_tags_from_event( event: &mut Event, max_tag_value_size: usize, - http_scrubbing_allow_list: &[Host], + http_scrubbing_allow_list: &[String], ) { // Temporarily take ownership to pass both an event reference and a mutable span reference to `extract_span_tags`. let mut spans = std::mem::take(&mut event.spans); @@ -208,7 +208,7 @@ pub fn extract_span_tags( event: &Event, spans: &mut [Annotated], max_tag_value_size: usize, - span_allowed_hosts: &[Host], + span_allowed_hosts: &[String], ) { // TODO: To prevent differences between metrics and payloads, we should not extract tags here // when they have already been extracted by a downstream relay. @@ -495,7 +495,7 @@ pub fn extract_tags( full_display: Option, is_mobile: bool, start_type: Option<&str>, - span_allowed_hosts: &[Host], + span_allowed_hosts: &[String], ) -> BTreeMap { let mut span_tags: BTreeMap = BTreeMap::new(); diff --git a/relay-filter/src/csp.rs b/relay-filter/src/csp.rs index a980d672d3..7a37a3c56a 100644 --- a/relay-filter/src/csp.rs +++ b/relay-filter/src/csp.rs @@ -81,7 +81,7 @@ impl From<&str> for SchemeDomainPort { (None, url) // no scheme, chop nothing form original string }; - //extract domain:port from the rest of the url + // extract domain:port from the rest of the url let end_domain_idx = rest.find('/'); let domain_port = if let Some(end_domain_idx) = end_domain_idx { &rest[..end_domain_idx] // remove the path from rest @@ -89,8 +89,17 @@ impl From<&str> for SchemeDomainPort { rest // no path, use everything }; - //split the domain and the port - let port_separator_idx = domain_port.find(':'); + // split the domain and the port + let ipv6_end_bracket_idx = rest.rfind(']'); + let port_separator_idx = if let Some(end_bracket_idx) = ipv6_end_bracket_idx { + // we have an ipv6 address, find the port separator after the closing bracket + domain_port[end_bracket_idx..] + .rfind(':') + .map(|x| x + end_bracket_idx) + } else { + // no ipv6 address, find the port separator in the whole string + domain_port.rfind(':') + }; let (domain, port) = if let Some(port_separator_idx) = port_separator_idx { //we have a port separator, split the string into domain and port ( @@ -234,6 +243,39 @@ mod tests { } } + #[test] + fn test_scheme_domain_port_with_ip() { + let examples = &[ + ( + "http://192.168.1.1:3000", + Some("http"), + Some("192.168.1.1"), + Some("3000"), + ), + ("192.168.1.1", None, Some("192.168.1.1"), None), + ("[fd45:7aa3:7ae4::]", None, Some("[fd45:7aa3:7ae4::]"), None), + ("http://172.16.*.*", Some("http"), Some("172.16.*.*"), None), + ( + "http://[1fff:0:a88:85a3::ac1f]:8001", + Some("http"), + Some("[1fff:0:a88:85a3::ac1f]"), + Some("8001"), + ), + ]; + + for (url, scheme, domain, port) in examples { + let actual: SchemeDomainPort = (*url).into(); + assert_eq!( + (actual.scheme, actual.domain, actual.port), + ( + scheme.map(|x| x.to_string()), + domain.map(|x| x.to_string()), + port.map(|x| x.to_string()) + ) + ); + } + } + #[test] fn test_matches_any_origin() { let examples = &[ diff --git a/relay-server/src/services/processor/span.rs b/relay-server/src/services/processor/span.rs index 50767e6b26..00eb6e8d85 100644 --- a/relay-server/src/services/processor/span.rs +++ b/relay-server/src/services/processor/span.rs @@ -4,7 +4,6 @@ use relay_dynamic_config::Feature; use relay_event_normalization::span::tag_extraction; use relay_event_schema::protocol::{Event, Span}; use relay_protocol::Annotated; -use url::Host; use crate::services::processor::SpanGroup; use crate::{services::processor::ProcessEnvelopeState, utils::ItemAction}; @@ -32,7 +31,7 @@ pub fn filter(state: &mut ProcessEnvelopeState) { pub fn extract_transaction_span( event: &Event, max_tag_value_size: usize, - span_allowed_hosts: &[Host], + span_allowed_hosts: &[String], ) -> Option { let mut spans = [Span::from(event).into()]; diff --git a/relay-server/src/services/processor/span/processing.rs b/relay-server/src/services/processor/span/processing.rs index 6a2e6eee0e..18ae9bb420 100644 --- a/relay-server/src/services/processor/span/processing.rs +++ b/relay-server/src/services/processor/span/processing.rs @@ -27,7 +27,6 @@ use relay_protocol::{Annotated, Empty}; use relay_quotas::DataCategory; use relay_spans::otel_trace::Span as OtelSpan; use thiserror::Error; -use url::Host; use crate::envelope::{ContentType, Item, ItemType}; use crate::metrics_extraction::metrics_summary; @@ -339,7 +338,7 @@ struct NormalizeSpanConfig<'a> { /// Client hints parsed from the request. client_hints: ClientHints, /// Hosts that are not replaced by "*" in HTTP span grouping. - allowed_hosts: &'a [Host], + allowed_hosts: &'a [String], } impl<'a> NormalizeSpanConfig<'a> {