From 2163520998d0d2ef157ad622d20af9a4a986339c Mon Sep 17 00:00:00 2001 From: David Herberth Date: Mon, 27 Nov 2023 14:45:04 +0100 Subject: [PATCH] feat(metric-meta): Normalize dashes to underscores in metric names --- CHANGELOG.md | 4 ++ relay-base-schema/src/metrics.rs | 39 ++++++++++++---- .../src/normalize/processor.rs | 21 ++++++--- relay-metrics/src/protocol.rs | 45 ++++++++++++++----- 4 files changed, 84 insertions(+), 25 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a29be668583..dc7a714231e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,10 @@ - `normalize_performance_score` now handles `PerformanceScoreProfile` configs with zero weight components and component weight sums of any number greater than 0. ([#2756](https://github.com/getsentry/relay/pull/2756)) +**Features**: + +- Normalize dashes in metric names to underscores. ([#2769](https://github.com/getsentry/relay/pull/2769)) + **Internal**: - Add support for metric metadata. ([#2751](https://github.com/getsentry/relay/pull/2751)) diff --git a/relay-base-schema/src/metrics.rs b/relay-base-schema/src/metrics.rs index f4089509ba7..d2ebbbf0580 100644 --- a/relay-base-schema/src/metrics.rs +++ b/relay-base-schema/src/metrics.rs @@ -1,21 +1,44 @@ //! Type definitions for Sentry metrics. -use std::fmt; +use std::{borrow::Cow, fmt}; use relay_protocol::{Annotated, Empty, ErrorKind, FromValue, IntoValue, SkipSerialization, Value}; -/// Validates a metric name. This is the statsd name, i.e. without type or unit. +/// Validates a metric name and tries to normalize it. This is the statsd name, i.e. without type or unit. /// /// Metric names cannot be empty, must begin with a letter and can consist of ASCII alphanumerics, -/// underscores, slashes and periods. -pub fn is_valid_metric_name(name: &str) -> bool { +/// underscores, dashes, slashes and periods. +/// +/// Dashes in metric names are normalized to underscores. +pub fn try_normalize_metric_name(name: &str) -> Option> { let mut iter = name.as_bytes().iter(); - if let Some(first_byte) = iter.next() { - if first_byte.is_ascii_alphabetic() { - return iter.all(|b| b.is_ascii_alphanumeric() || matches!(b, b'.' | b'_' | b'/')); + + let Some(first_byte) = iter.next() else { + // must not be empty + return None; + }; + + if !first_byte.is_ascii_alphabetic() { + return None; + } + + let mut needs_normalization = false; + for &b in iter { + if b == b'-' { + needs_normalization = true; + continue; } + + if !b.is_ascii_alphabetic() && !matches!(b, b'.' | b'_' | b'/') { + return None; + } + } + + if needs_normalization { + Some(Cow::Owned(name.replace('-', "_"))) + } else { + Some(Cow::Borrowed(name)) } - false } /// The unit of measurement of a metric value. diff --git a/relay-event-normalization/src/normalize/processor.rs b/relay-event-normalization/src/normalize/processor.rs index c216ba67cee..56a0d6120f5 100644 --- a/relay-event-normalization/src/normalize/processor.rs +++ b/relay-event-normalization/src/normalize/processor.rs @@ -14,7 +14,9 @@ use std::mem; use std::ops::Range; use chrono::{DateTime, Duration, Utc}; -use relay_base_schema::metrics::{is_valid_metric_name, DurationUnit, FractionUnit, MetricUnit}; +use relay_base_schema::metrics::{ + try_normalize_metric_name, DurationUnit, FractionUnit, MetricUnit, +}; use relay_common::time::UnixTimestamp; use relay_event_schema::processor::{ self, MaxChars, ProcessingAction, ProcessingResult, ProcessingState, Processor, @@ -959,13 +961,13 @@ fn remove_invalid_measurements( None => return false, }; - if !is_valid_metric_name(name) { + let Some(name) = try_normalize_metric_name(name) else { meta.add_error(Error::invalid(format!( "Metric name contains invalid characters: \"{name}\"" ))); removed_measurements.insert(name.clone(), Annotated::new(std::mem::take(measurement))); return false; - } + }; // TODO(jjbayer): Should we actually normalize the unit into the event? let unit = measurement.unit.value().unwrap_or(&MetricUnit::None); @@ -978,15 +980,17 @@ fn remove_invalid_measurements( "Metric name too long {}/{max_name_len}: \"{name}\"", name.len(), ))); - removed_measurements - .insert(name.clone(), Annotated::new(std::mem::take(measurement))); + removed_measurements.insert( + name.into_owned(), + Annotated::new(std::mem::take(measurement)), + ); return false; } } // Check if this is a builtin measurement: for builtin_measurement in measurements_config.builtin_measurement_keys() { - if &builtin_measurement.name == name { + if builtin_measurement.name == name { // If the unit matches a built-in measurement, we allow it. // If the name matches but the unit is wrong, we do not even accept it as a custom measurement, // and just drop it instead. @@ -1001,7 +1005,10 @@ fn remove_invalid_measurements( } meta.add_error(Error::invalid(format!("Too many measurements: {name}"))); - removed_measurements.insert(name.clone(), Annotated::new(std::mem::take(measurement))); + removed_measurements.insert( + name.into_owned(), + Annotated::new(std::mem::take(measurement)), + ); false }); diff --git a/relay-metrics/src/protocol.rs b/relay-metrics/src/protocol.rs index 3b72f11ed45..94e821596d0 100644 --- a/relay-metrics/src/protocol.rs +++ b/relay-metrics/src/protocol.rs @@ -269,14 +269,19 @@ impl<'a> MetricResourceIdentifier<'a> { ) -> Result { let (name_and_namespace, unit) = parse_name_unit(string).ok_or(ParseMetricError(()))?; - let (raw_namespace, name) = name_and_namespace - .split_once('/') - .unwrap_or(("custom", name_and_namespace)); + let (namespace, name) = match name_and_namespace.split_once('/') { + Some((raw_namespace, name)) => (raw_namespace.parse()?, name), + None => (MetricNamespace::Custom, name_and_namespace), + }; + + dbg!(&namespace, &name); + let name = relay_base_schema::metrics::try_normalize_metric_name(name) + .ok_or(ParseMetricError(()))?; Ok(MetricResourceIdentifier { ty, - name: Cow::Borrowed(name), - namespace: raw_namespace.parse()?, + name, + namespace, unit, }) } @@ -350,14 +355,11 @@ pub(crate) fn validate_tag_value(tag_value: &mut String) { /// Parses the `name[@unit]` part of a metric string. /// -/// Returns [`MetricUnit::None`] if no unit is specified. Returns `None` if the name or value are -/// invalid. +/// Returns [`MetricUnit::None`] if no unit is specified. Returns `None` if value is invalid. +/// The name is not normalized. fn parse_name_unit(string: &str) -> Option<(&str, MetricUnit)> { let mut components = string.split('@'); let name = components.next()?; - if !relay_base_schema::metrics::is_valid_metric_name(name) { - return None; - } let unit = match components.next() { Some(s) => s.parse().ok()?, @@ -448,6 +450,29 @@ mod tests { assert!(MetricResourceIdentifier::parse("foo").is_err()); } + #[test] + fn test_invalid_names_should_fail() { + assert!(MetricResourceIdentifier::parse("c:f?o").is_err()); + assert!(MetricResourceIdentifier::parse("c:föo").is_err()); + assert!(MetricResourceIdentifier::parse("c:3fo").is_err()); + assert!(MetricResourceIdentifier::parse("c:custom/f?o").is_err()); + assert!(MetricResourceIdentifier::parse("c:custom/föo").is_err()); + assert!(MetricResourceIdentifier::parse("c:custom/3fo").is_err()); + } + + #[test] + fn test_normalize_dash_to_underscore() { + assert_eq!( + MetricResourceIdentifier::parse("d:foo.bar.blob-size@second").unwrap(), + MetricResourceIdentifier { + ty: MetricType::Distribution, + namespace: MetricNamespace::Custom, + name: "foo.bar.blob_size".into(), + unit: MetricUnit::Duration(DurationUnit::Second), + }, + ); + } + #[test] fn test_deserialize_mri() { assert_eq!(