diff --git a/CHANGELOG.md b/CHANGELOG.md index 5e83f4e10d..4500d92984 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ **Bug Fixes**: +- Use `matches_any_origin` to scrub HTTP hosts in spans. ([#3939](https://github.com/getsentry/relay/pull/3939)). - Keep frames from both ends of the stacktrace when trimming frames. ([#3905](https://github.com/getsentry/relay/pull/3905)) **Features**: diff --git a/Cargo.lock b/Cargo.lock index db2a4dc17d..02799217dc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3614,6 +3614,7 @@ dependencies = [ "relay-base-schema", "relay-common", "relay-event-schema", + "relay-filter", "relay-log", "relay-protocol", "relay-statsd", diff --git a/relay-dynamic-config/src/global.rs b/relay-dynamic-config/src/global.rs index 8f84907298..692c9da82b 100644 --- a/relay-dynamic-config/src/global.rs +++ b/relay-dynamic-config/src/global.rs @@ -10,7 +10,6 @@ use relay_filter::GenericFiltersConfig; use relay_quotas::Quota; use serde::{de, Deserialize, Serialize}; use serde_json::Value; -use url::Host; use crate::{defaults, ErrorBoundary, MetricExtractionGroup, MetricExtractionGroups}; @@ -192,7 +191,7 @@ pub struct Options { deserialize_with = "default_on_error", skip_serializing_if = "Vec::is_empty" )] - pub http_span_allowed_hosts: Vec, + pub http_span_allowed_hosts: Vec, /// Deprecated, still forwarded for older downstream Relays. #[doc(hidden)] diff --git a/relay-event-normalization/Cargo.toml b/relay-event-normalization/Cargo.toml index 26a62a054a..9f027e5d00 100644 --- a/relay-event-normalization/Cargo.toml +++ b/relay-event-normalization/Cargo.toml @@ -29,6 +29,7 @@ relay-log = { workspace = true } relay-protocol = { workspace = true } relay-statsd = { workspace = true } relay-ua = { workspace = true } +relay-filter = { workspace = true } sentry-release-parser = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } diff --git a/relay-event-normalization/src/event.rs b/relay-event-normalization/src/event.rs index bb859fbcfa..4437415dc3 100644 --- a/relay-event-normalization/src/event.rs +++ b/relay-event-normalization/src/event.rs @@ -25,7 +25,6 @@ use relay_protocol::{ RemarkType, Value, }; use smallvec::SmallVec; -use url::Host; use uuid::Uuid; use crate::normalize::request; @@ -157,7 +156,7 @@ pub struct NormalizationConfig<'a> { pub replay_id: Option, /// Controls list of hosts to be excluded from scrubbing - pub span_allowed_hosts: &'a [Host], + pub span_allowed_hosts: &'a [String], } impl<'a> Default for NormalizationConfig<'a> { diff --git a/relay-event-normalization/src/normalize/span/description/mod.rs b/relay-event-normalization/src/normalize/span/description/mod.rs index 31cc0f0002..9e53e0c5b1 100644 --- a/relay-event-normalization/src/normalize/span/description/mod.rs +++ b/relay-event-normalization/src/normalize/span/description/mod.rs @@ -3,6 +3,7 @@ mod resource; mod sql; use once_cell::sync::Lazy; use psl; +use relay_filter::matches_any_origin; #[cfg(test)] pub use sql::{scrub_queries, Mode}; @@ -38,7 +39,7 @@ const DOMAIN_ALLOW_LIST: &[&str] = &["localhost"]; /// Returns `None` if no scrubbing can be performed. pub(crate) fn scrub_span_description( span: &Span, - span_allowed_hosts: &[Host], + span_allowed_hosts: &[String], ) -> (Option, Option>) { let Some(description) = span.description.as_str() else { return (None, None); @@ -167,7 +168,7 @@ fn scrub_supabase(string: &str) -> Option { Some(DB_SUPABASE_REGEX.replace_all(string, "{%s}").into()) } -fn scrub_http(string: &str, allow_list: &[Host]) -> Option { +fn scrub_http(string: &str, allow_list: &[String]) -> Option { let (method, url) = string.split_once(' ')?; if !HTTP_METHOD_EXTRACTOR_REGEX.is_match(method) { return None; @@ -225,10 +226,15 @@ fn scrub_file(description: &str) -> Option { /// /// assert_eq!(scrub_host(Host::Domain("foo.bar.baz"), &[]), "*.bar.baz"); /// assert_eq!(scrub_host(Host::Ipv4(Ipv4Addr::LOCALHOST), &[]), "127.0.0.1"); -/// assert_eq!(scrub_host(Host::Ipv4(Ipv4Addr::new(8, 8, 8, 8)), &[Host::parse("8.8.8.8").unwrap()]), "8.8.8.8"); +/// assert_eq!(scrub_host(Host::Ipv4(Ipv4Addr::new(8, 8, 8, 8)), &[String::from("8.8.8.8")]), "8.8.8.8"); /// ``` -pub fn scrub_host<'a>(host: Host<&'a str>, allow_list: &'a [Host]) -> Cow<'a, str> { - if allow_list.iter().any(|allowed_host| &host == allowed_host) { +pub fn scrub_host<'a>(host: Host<&'a str>, allow_list: &'a [String]) -> Cow<'a, str> { + let allow_list: Vec<_> = allow_list + .iter() + .map(|origin| origin.as_str().into()) + .collect(); + + if matches_any_origin(Some(host.to_string().as_str()), &allow_list) { return host.to_string().into(); } @@ -534,11 +540,10 @@ fn scrub_function(string: &str) -> Option { #[cfg(test)] mod tests { + use super::*; use relay_protocol::Annotated; use similar_asserts::assert_eq; - use super::*; - macro_rules! span_description_test { // Tests the scrubbed span description for the given op. @@ -1232,4 +1237,51 @@ mod tests { // Can be scrubbed with db system. assert_eq!(scrubbed.0.as_deref(), Some("my-component-name")); } + + #[test] + fn scrub_allowed_host() { + let examples = [ + ( + "https://foo.bar.internal/api/v1/submit", + ["foo.bar.internal".to_string()], + "https://foo.bar.internal", + ), + ( + "http://192.168.1.1:3000", + ["192.168.1.1".to_string()], + "http://192.168.1.1:3000", + ), + ( + "http://[1fff:0:a88:85a3::ac1f]:8001/foo", + ["[1fff:0:a88:85a3::ac1f]".to_string()], + "http://[1fff:0:a88:85a3::ac1f]:8001", + ), + ]; + + for (url, allowed_hosts, expected) in examples { + let json = format!( + r#"{{ + "description": "POST {}", + "span_id": "bd2eb23da2beb459", + "start_timestamp": 1597976393.4619668, + "timestamp": 1597976393.4718769, + "trace_id": "ff62a8b040f340bda5d830223def1d81", + "op": "http.client" + }} + "#, + url, + ); + + let mut span = Annotated::::from_json(&json).unwrap(); + + let scrubbed = + scrub_span_description(span.value_mut().as_mut().unwrap(), &allowed_hosts); + + assert_eq!( + scrubbed.0.as_deref(), + Some(format!("POST {}", expected).as_str()), + "Could not match {url}" + ); + } + } } diff --git a/relay-event-normalization/src/normalize/span/tag_extraction.rs b/relay-event-normalization/src/normalize/span/tag_extraction.rs index 9dc15454ac..412de8fb96 100644 --- a/relay-event-normalization/src/normalize/span/tag_extraction.rs +++ b/relay-event-normalization/src/normalize/span/tag_extraction.rs @@ -16,7 +16,7 @@ use relay_event_schema::protocol::{ use relay_protocol::{Annotated, Empty, Value}; use sqlparser::ast::Visit; use sqlparser::ast::{ObjectName, Visitor}; -use url::{Host, Url}; +use url::Url; use crate::span::country_subregion::Subregion; use crate::span::description::{ @@ -184,7 +184,7 @@ impl std::fmt::Display for RenderBlockingStatus { pub(crate) fn extract_span_tags_from_event( event: &mut Event, max_tag_value_size: usize, - http_scrubbing_allow_list: &[Host], + http_scrubbing_allow_list: &[String], ) { // Temporarily take ownership to pass both an event reference and a mutable span reference to `extract_span_tags`. let mut spans = std::mem::take(&mut event.spans); @@ -208,7 +208,7 @@ pub fn extract_span_tags( event: &Event, spans: &mut [Annotated], max_tag_value_size: usize, - span_allowed_hosts: &[Host], + span_allowed_hosts: &[String], ) { // TODO: To prevent differences between metrics and payloads, we should not extract tags here // when they have already been extracted by a downstream relay. @@ -495,7 +495,7 @@ pub fn extract_tags( full_display: Option, is_mobile: bool, start_type: Option<&str>, - span_allowed_hosts: &[Host], + span_allowed_hosts: &[String], ) -> BTreeMap { let mut span_tags: BTreeMap = BTreeMap::new(); diff --git a/relay-filter/src/csp.rs b/relay-filter/src/csp.rs index a980d672d3..7a37a3c56a 100644 --- a/relay-filter/src/csp.rs +++ b/relay-filter/src/csp.rs @@ -81,7 +81,7 @@ impl From<&str> for SchemeDomainPort { (None, url) // no scheme, chop nothing form original string }; - //extract domain:port from the rest of the url + // extract domain:port from the rest of the url let end_domain_idx = rest.find('/'); let domain_port = if let Some(end_domain_idx) = end_domain_idx { &rest[..end_domain_idx] // remove the path from rest @@ -89,8 +89,17 @@ impl From<&str> for SchemeDomainPort { rest // no path, use everything }; - //split the domain and the port - let port_separator_idx = domain_port.find(':'); + // split the domain and the port + let ipv6_end_bracket_idx = rest.rfind(']'); + let port_separator_idx = if let Some(end_bracket_idx) = ipv6_end_bracket_idx { + // we have an ipv6 address, find the port separator after the closing bracket + domain_port[end_bracket_idx..] + .rfind(':') + .map(|x| x + end_bracket_idx) + } else { + // no ipv6 address, find the port separator in the whole string + domain_port.rfind(':') + }; let (domain, port) = if let Some(port_separator_idx) = port_separator_idx { //we have a port separator, split the string into domain and port ( @@ -234,6 +243,39 @@ mod tests { } } + #[test] + fn test_scheme_domain_port_with_ip() { + let examples = &[ + ( + "http://192.168.1.1:3000", + Some("http"), + Some("192.168.1.1"), + Some("3000"), + ), + ("192.168.1.1", None, Some("192.168.1.1"), None), + ("[fd45:7aa3:7ae4::]", None, Some("[fd45:7aa3:7ae4::]"), None), + ("http://172.16.*.*", Some("http"), Some("172.16.*.*"), None), + ( + "http://[1fff:0:a88:85a3::ac1f]:8001", + Some("http"), + Some("[1fff:0:a88:85a3::ac1f]"), + Some("8001"), + ), + ]; + + for (url, scheme, domain, port) in examples { + let actual: SchemeDomainPort = (*url).into(); + assert_eq!( + (actual.scheme, actual.domain, actual.port), + ( + scheme.map(|x| x.to_string()), + domain.map(|x| x.to_string()), + port.map(|x| x.to_string()) + ) + ); + } + } + #[test] fn test_matches_any_origin() { let examples = &[ diff --git a/relay-server/src/services/processor/span.rs b/relay-server/src/services/processor/span.rs index 50767e6b26..00eb6e8d85 100644 --- a/relay-server/src/services/processor/span.rs +++ b/relay-server/src/services/processor/span.rs @@ -4,7 +4,6 @@ use relay_dynamic_config::Feature; use relay_event_normalization::span::tag_extraction; use relay_event_schema::protocol::{Event, Span}; use relay_protocol::Annotated; -use url::Host; use crate::services::processor::SpanGroup; use crate::{services::processor::ProcessEnvelopeState, utils::ItemAction}; @@ -32,7 +31,7 @@ pub fn filter(state: &mut ProcessEnvelopeState) { pub fn extract_transaction_span( event: &Event, max_tag_value_size: usize, - span_allowed_hosts: &[Host], + span_allowed_hosts: &[String], ) -> Option { let mut spans = [Span::from(event).into()]; diff --git a/relay-server/src/services/processor/span/processing.rs b/relay-server/src/services/processor/span/processing.rs index 6a2e6eee0e..18ae9bb420 100644 --- a/relay-server/src/services/processor/span/processing.rs +++ b/relay-server/src/services/processor/span/processing.rs @@ -27,7 +27,6 @@ use relay_protocol::{Annotated, Empty}; use relay_quotas::DataCategory; use relay_spans::otel_trace::Span as OtelSpan; use thiserror::Error; -use url::Host; use crate::envelope::{ContentType, Item, ItemType}; use crate::metrics_extraction::metrics_summary; @@ -339,7 +338,7 @@ struct NormalizeSpanConfig<'a> { /// Client hints parsed from the request. client_hints: ClientHints, /// Hosts that are not replaced by "*" in HTTP span grouping. - allowed_hosts: &'a [Host], + allowed_hosts: &'a [String], } impl<'a> NormalizeSpanConfig<'a> {