Skip to content

Commit

Permalink
fix(dynamic-config): use matches_any_origin to scrub HTTP hosts in …
Browse files Browse the repository at this point in the history
…spans (#3939)

Closes #3936

### Legal Boilerplate

Look, I get it. The entity doing business as "Sentry" was incorporated
in the State of Delaware in 2015 as Functional Software, Inc. and is
gonna need some rights from me in order to utilize my contributions in
this here PR. So here's the deal: I retain all rights, title and
interest in and to my contributions, and by keeping this boilerplate
intact I confirm that Sentry can use, modify, copy, and redistribute my
contributions, under Sentry's choice of terms.

---------

Co-authored-by: Joris Bayer <joris.bayer@sentry.io>
  • Loading branch information
aldy505 and jjbayer committed Sep 3, 2024
1 parent c70c618 commit c367639
Show file tree
Hide file tree
Showing 10 changed files with 115 additions and 22 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

**Bug Fixes**:

- Use `matches_any_origin` to scrub HTTP hosts in spans. ([#3939](https://github.com/getsentry/relay/pull/3939)).
- Keep frames from both ends of the stacktrace when trimming frames. ([#3905](https://github.com/getsentry/relay/pull/3905))

**Features**:
Expand Down
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 1 addition & 2 deletions relay-dynamic-config/src/global.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ use relay_filter::GenericFiltersConfig;
use relay_quotas::Quota;
use serde::{de, Deserialize, Serialize};
use serde_json::Value;
use url::Host;

use crate::{defaults, ErrorBoundary, MetricExtractionGroup, MetricExtractionGroups};

Expand Down Expand Up @@ -192,7 +191,7 @@ pub struct Options {
deserialize_with = "default_on_error",
skip_serializing_if = "Vec::is_empty"
)]
pub http_span_allowed_hosts: Vec<Host>,
pub http_span_allowed_hosts: Vec<String>,

/// Deprecated, still forwarded for older downstream Relays.
#[doc(hidden)]
Expand Down
1 change: 1 addition & 0 deletions relay-event-normalization/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ relay-log = { workspace = true }
relay-protocol = { workspace = true }
relay-statsd = { workspace = true }
relay-ua = { workspace = true }
relay-filter = { workspace = true }
sentry-release-parser = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
Expand Down
3 changes: 1 addition & 2 deletions relay-event-normalization/src/event.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ use relay_protocol::{
RemarkType, Value,
};
use smallvec::SmallVec;
use url::Host;
use uuid::Uuid;

use crate::normalize::request;
Expand Down Expand Up @@ -157,7 +156,7 @@ pub struct NormalizationConfig<'a> {
pub replay_id: Option<Uuid>,

/// Controls list of hosts to be excluded from scrubbing
pub span_allowed_hosts: &'a [Host],
pub span_allowed_hosts: &'a [String],
}

impl<'a> Default for NormalizationConfig<'a> {
Expand Down
66 changes: 59 additions & 7 deletions relay-event-normalization/src/normalize/span/description/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ mod resource;
mod sql;
use once_cell::sync::Lazy;
use psl;
use relay_filter::matches_any_origin;
#[cfg(test)]
pub use sql::{scrub_queries, Mode};

Expand Down Expand Up @@ -38,7 +39,7 @@ const DOMAIN_ALLOW_LIST: &[&str] = &["localhost"];
/// Returns `None` if no scrubbing can be performed.
pub(crate) fn scrub_span_description(
span: &Span,
span_allowed_hosts: &[Host],
span_allowed_hosts: &[String],
) -> (Option<String>, Option<Vec<sqlparser::ast::Statement>>) {
let Some(description) = span.description.as_str() else {
return (None, None);
Expand Down Expand Up @@ -167,7 +168,7 @@ fn scrub_supabase(string: &str) -> Option<String> {
Some(DB_SUPABASE_REGEX.replace_all(string, "{%s}").into())
}

fn scrub_http(string: &str, allow_list: &[Host]) -> Option<String> {
fn scrub_http(string: &str, allow_list: &[String]) -> Option<String> {
let (method, url) = string.split_once(' ')?;
if !HTTP_METHOD_EXTRACTOR_REGEX.is_match(method) {
return None;
Expand Down Expand Up @@ -225,10 +226,15 @@ fn scrub_file(description: &str) -> Option<String> {
///
/// assert_eq!(scrub_host(Host::Domain("foo.bar.baz"), &[]), "*.bar.baz");
/// assert_eq!(scrub_host(Host::Ipv4(Ipv4Addr::LOCALHOST), &[]), "127.0.0.1");
/// assert_eq!(scrub_host(Host::Ipv4(Ipv4Addr::new(8, 8, 8, 8)), &[Host::parse("8.8.8.8").unwrap()]), "8.8.8.8");
/// assert_eq!(scrub_host(Host::Ipv4(Ipv4Addr::new(8, 8, 8, 8)), &[String::from("8.8.8.8")]), "8.8.8.8");
/// ```
pub fn scrub_host<'a>(host: Host<&'a str>, allow_list: &'a [Host]) -> Cow<'a, str> {
if allow_list.iter().any(|allowed_host| &host == allowed_host) {
pub fn scrub_host<'a>(host: Host<&'a str>, allow_list: &'a [String]) -> Cow<'a, str> {
let allow_list: Vec<_> = allow_list
.iter()
.map(|origin| origin.as_str().into())
.collect();

if matches_any_origin(Some(host.to_string().as_str()), &allow_list) {
return host.to_string().into();
}

Expand Down Expand Up @@ -534,11 +540,10 @@ fn scrub_function(string: &str) -> Option<String> {

#[cfg(test)]
mod tests {
use super::*;
use relay_protocol::Annotated;
use similar_asserts::assert_eq;

use super::*;

macro_rules! span_description_test {
// Tests the scrubbed span description for the given op.

Expand Down Expand Up @@ -1232,4 +1237,51 @@ mod tests {
// Can be scrubbed with db system.
assert_eq!(scrubbed.0.as_deref(), Some("my-component-name"));
}

#[test]
fn scrub_allowed_host() {
let examples = [
(
"https://foo.bar.internal/api/v1/submit",
["foo.bar.internal".to_string()],
"https://foo.bar.internal",
),
(
"http://192.168.1.1:3000",
["192.168.1.1".to_string()],
"http://192.168.1.1:3000",
),
(
"http://[1fff:0:a88:85a3::ac1f]:8001/foo",
["[1fff:0:a88:85a3::ac1f]".to_string()],
"http://[1fff:0:a88:85a3::ac1f]:8001",
),
];

for (url, allowed_hosts, expected) in examples {
let json = format!(
r#"{{
"description": "POST {}",
"span_id": "bd2eb23da2beb459",
"start_timestamp": 1597976393.4619668,
"timestamp": 1597976393.4718769,
"trace_id": "ff62a8b040f340bda5d830223def1d81",
"op": "http.client"
}}
"#,
url,
);

let mut span = Annotated::<Span>::from_json(&json).unwrap();

let scrubbed =
scrub_span_description(span.value_mut().as_mut().unwrap(), &allowed_hosts);

assert_eq!(
scrubbed.0.as_deref(),
Some(format!("POST {}", expected).as_str()),
"Could not match {url}"
);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ use relay_event_schema::protocol::{
use relay_protocol::{Annotated, Empty, Value};
use sqlparser::ast::Visit;
use sqlparser::ast::{ObjectName, Visitor};
use url::{Host, Url};
use url::Url;

use crate::span::country_subregion::Subregion;
use crate::span::description::{
Expand Down Expand Up @@ -184,7 +184,7 @@ impl std::fmt::Display for RenderBlockingStatus {
pub(crate) fn extract_span_tags_from_event(
event: &mut Event,
max_tag_value_size: usize,
http_scrubbing_allow_list: &[Host],
http_scrubbing_allow_list: &[String],
) {
// Temporarily take ownership to pass both an event reference and a mutable span reference to `extract_span_tags`.
let mut spans = std::mem::take(&mut event.spans);
Expand All @@ -208,7 +208,7 @@ pub fn extract_span_tags(
event: &Event,
spans: &mut [Annotated<Span>],
max_tag_value_size: usize,
span_allowed_hosts: &[Host],
span_allowed_hosts: &[String],
) {
// TODO: To prevent differences between metrics and payloads, we should not extract tags here
// when they have already been extracted by a downstream relay.
Expand Down Expand Up @@ -495,7 +495,7 @@ pub fn extract_tags(
full_display: Option<Timestamp>,
is_mobile: bool,
start_type: Option<&str>,
span_allowed_hosts: &[Host],
span_allowed_hosts: &[String],
) -> BTreeMap<SpanTagKey, String> {
let mut span_tags: BTreeMap<SpanTagKey, String> = BTreeMap::new();

Expand Down
48 changes: 45 additions & 3 deletions relay-filter/src/csp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,16 +81,25 @@ impl From<&str> for SchemeDomainPort {
(None, url) // no scheme, chop nothing form original string
};

//extract domain:port from the rest of the url
// extract domain:port from the rest of the url
let end_domain_idx = rest.find('/');
let domain_port = if let Some(end_domain_idx) = end_domain_idx {
&rest[..end_domain_idx] // remove the path from rest
} else {
rest // no path, use everything
};

//split the domain and the port
let port_separator_idx = domain_port.find(':');
// split the domain and the port
let ipv6_end_bracket_idx = rest.rfind(']');
let port_separator_idx = if let Some(end_bracket_idx) = ipv6_end_bracket_idx {
// we have an ipv6 address, find the port separator after the closing bracket
domain_port[end_bracket_idx..]
.rfind(':')
.map(|x| x + end_bracket_idx)
} else {
// no ipv6 address, find the port separator in the whole string
domain_port.rfind(':')
};
let (domain, port) = if let Some(port_separator_idx) = port_separator_idx {
//we have a port separator, split the string into domain and port
(
Expand Down Expand Up @@ -234,6 +243,39 @@ mod tests {
}
}

#[test]
fn test_scheme_domain_port_with_ip() {
let examples = &[
(
"http://192.168.1.1:3000",
Some("http"),
Some("192.168.1.1"),
Some("3000"),
),
("192.168.1.1", None, Some("192.168.1.1"), None),
("[fd45:7aa3:7ae4::]", None, Some("[fd45:7aa3:7ae4::]"), None),
("http://172.16.*.*", Some("http"), Some("172.16.*.*"), None),
(
"http://[1fff:0:a88:85a3::ac1f]:8001",
Some("http"),
Some("[1fff:0:a88:85a3::ac1f]"),
Some("8001"),
),
];

for (url, scheme, domain, port) in examples {
let actual: SchemeDomainPort = (*url).into();
assert_eq!(
(actual.scheme, actual.domain, actual.port),
(
scheme.map(|x| x.to_string()),
domain.map(|x| x.to_string()),
port.map(|x| x.to_string())
)
);
}
}

#[test]
fn test_matches_any_origin() {
let examples = &[
Expand Down
3 changes: 1 addition & 2 deletions relay-server/src/services/processor/span.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ use relay_dynamic_config::Feature;
use relay_event_normalization::span::tag_extraction;
use relay_event_schema::protocol::{Event, Span};
use relay_protocol::Annotated;
use url::Host;

use crate::services::processor::SpanGroup;
use crate::{services::processor::ProcessEnvelopeState, utils::ItemAction};
Expand Down Expand Up @@ -32,7 +31,7 @@ pub fn filter(state: &mut ProcessEnvelopeState<SpanGroup>) {
pub fn extract_transaction_span(
event: &Event,
max_tag_value_size: usize,
span_allowed_hosts: &[Host],
span_allowed_hosts: &[String],
) -> Option<Span> {
let mut spans = [Span::from(event).into()];

Expand Down
3 changes: 1 addition & 2 deletions relay-server/src/services/processor/span/processing.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ use relay_protocol::{Annotated, Empty};
use relay_quotas::DataCategory;
use relay_spans::otel_trace::Span as OtelSpan;
use thiserror::Error;
use url::Host;

use crate::envelope::{ContentType, Item, ItemType};
use crate::metrics_extraction::metrics_summary;
Expand Down Expand Up @@ -339,7 +338,7 @@ struct NormalizeSpanConfig<'a> {
/// Client hints parsed from the request.
client_hints: ClientHints<String>,
/// Hosts that are not replaced by "*" in HTTP span grouping.
allowed_hosts: &'a [Host],
allowed_hosts: &'a [String],
}

impl<'a> NormalizeSpanConfig<'a> {
Expand Down

0 comments on commit c367639

Please sign in to comment.