diff --git a/CHANGELOG.md b/CHANGELOG.md index d57f876328..0d62c35a7f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ - Extend GPU context with data for Unreal Engine crash reports. ([#3144](https://github.com/getsentry/relay/pull/3144)) - Parametrize transaction in dynamic sampling context. ([#3141](https://github.com/getsentry/relay/pull/3141)) +- Parse & scrub span description for supabase. ([#3153](https://github.com/getsentry/relay/pull/3153)) **Bug Fixes**: diff --git a/relay-event-normalization/src/normalize/span/description/mod.rs b/relay-event-normalization/src/normalize/span/description/mod.rs index 2aa6bbc8fe..da659dc564 100644 --- a/relay-event-normalization/src/normalize/span/description/mod.rs +++ b/relay-event-normalization/src/normalize/span/description/mod.rs @@ -13,7 +13,8 @@ use relay_event_schema::protocol::Span; use url::Url; use crate::regexes::{ - DB_SQL_TRANSACTION_CORE_DATA_REGEX, REDIS_COMMAND_REGEX, RESOURCE_NORMALIZER_REGEX, + DB_SQL_TRANSACTION_CORE_DATA_REGEX, DB_SUPABASE_REGEX, REDIS_COMMAND_REGEX, + RESOURCE_NORMALIZER_REGEX, }; use crate::span::description::resource::COMMON_PATH_SEGMENTS; use crate::span::tag_extraction::HTTP_METHOD_EXTRACTOR_REGEX; @@ -70,6 +71,11 @@ pub(crate) fn scrub_span_description( // The description will only contain the entity queried and // the query type ("User find" for example). Some(description.to_owned()) + } else if span_origin == Some("auto.db.supabase") { + // The description only contains the table name, e.g. `"from(users)`. + // In the future, we might want to parse `data.query` as well. + // See https://github.com/supabase-community/sentry-integration-js/blob/master/index.js#L259 + scrub_supabase(description) } else { let (scrubbed, mode) = sql::scrub_queries(db_system, description); if let sql::Mode::Parsed(ast) = mode { @@ -141,6 +147,13 @@ fn scrub_core_data(string: &str) -> Option { } } +fn scrub_supabase(string: &str) -> Option { + match DB_SUPABASE_REGEX.replace_all(string, "{%s}") { + Cow::Owned(scrubbed) => Some(scrubbed), + Cow::Borrowed(_) => None, + } +} + fn scrub_http(string: &str) -> Option { let (method, url) = string.split_once(' ')?; if !HTTP_METHOD_EXTRACTOR_REGEX.is_match(method) { diff --git a/relay-event-normalization/src/normalize/span/tag_extraction.rs b/relay-event-normalization/src/normalize/span/tag_extraction.rs index a3c99289f4..44a78e7325 100644 --- a/relay-event-normalization/src/normalize/span/tag_extraction.rs +++ b/relay-event-normalization/src/normalize/span/tag_extraction.rs @@ -365,6 +365,12 @@ pub fn extract_tags( } else { None } + } else if span.origin.as_str() == Some("auto.db.supabase") { + scrubbed_description.as_deref().map(|s| { + s.trim_start_matches("from(") + .trim_end_matches(')') + .to_owned() + }) } else if span_op.starts_with("db") { span.description .value() @@ -1428,4 +1434,44 @@ LIMIT 1 Some(&"Chrome".to_string()) ); } + + #[test] + fn supabase() { + let json = r#"{ + "description": "from(my_table00)", + "op": "db.select", + "origin": "auto.db.supabase", + "data": { + "query": [ + "select(*,other(*))", + "in(something, (value1,value2))" + ] + } + }"#; + + let span = Annotated::::from_json(json) + .unwrap() + .into_value() + .unwrap(); + + let tags = extract_tags( + &span, + &Config { + max_tag_value_size: 200, + }, + None, + None, + false, + None, + ); + + assert_eq!( + tags.get(&SpanTagKey::Description).map(String::as_str), + Some("from(my_table{%s})") + ); + assert_eq!( + tags.get(&SpanTagKey::Domain).map(String::as_str), + Some("my_table{%s}") + ); + } } diff --git a/relay-event-normalization/src/regexes.rs b/relay-event-normalization/src/regexes.rs index 76d5439f76..b273f1ef07 100644 --- a/relay-event-normalization/src/regexes.rs +++ b/relay-event-normalization/src/regexes.rs @@ -81,3 +81,17 @@ pub static RESOURCE_NORMALIZER_REGEX: Lazy = Lazy::new(|| { pub static DB_SQL_TRANSACTION_CORE_DATA_REGEX: Lazy = Lazy::new(|| Regex::new(r"(?P\d+)").unwrap()); + +pub static DB_SUPABASE_REGEX: Lazy = Lazy::new(|| { + Regex::new( + r"(?xi) + # UUIDs. + (?P[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}) | + # Hexadecimal strings with more than 5 digits. + (?P[a-f0-9]{5}[a-f0-9]+) | + # Integer IDs with more than one digit. + (?P\d\d+) + ", + ) + .unwrap() +});