-
Notifications
You must be signed in to change notification settings - Fork 93
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat(store): Limit custom measurements [INGEST-1621] #1483
Changes from 6 commits
62dce92
dc61d75
8800cce
bef558d
ceac3cd
98539fe
0024d9e
71e8054
2b291c5
77da15d
8c4ac94
d67d074
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -1,4 +1,5 @@ | ||||||
use std::collections::hash_map::DefaultHasher; | ||||||
use std::collections::BTreeSet; | ||||||
use std::hash::{Hash, Hasher}; | ||||||
use std::mem; | ||||||
use std::sync::Arc; | ||||||
|
@@ -7,9 +8,11 @@ use chrono::{DateTime, Duration, Utc}; | |||||
use itertools::Itertools; | ||||||
use once_cell::sync::OnceCell; | ||||||
use regex::Regex; | ||||||
use relay_common::{DurationUnit, FractionUnit, MetricUnit}; | ||||||
use serde::{Deserialize, Serialize}; | ||||||
use smallvec::SmallVec; | ||||||
|
||||||
use relay_common::{DurationUnit, FractionUnit, MetricUnit}; | ||||||
|
||||||
use super::{schema, transactions, BreakdownsConfig}; | ||||||
use crate::processor::{MaxChars, ProcessValue, ProcessingState, Processor}; | ||||||
use crate::protocol::{ | ||||||
|
@@ -34,6 +37,18 @@ mod stacktrace; | |||||
|
||||||
mod user_agent; | ||||||
|
||||||
/// Configuration for measurements normalization. | ||||||
#[derive(Debug, Default, Clone, Serialize, Deserialize)] | ||||||
#[serde(default, rename_all = "camelCase")] | ||||||
pub struct MeasurementsConfig { | ||||||
/// A list of measurements that are built-in and are not subject to custom measurement limits. | ||||||
#[serde(default, skip_serializing_if = "BTreeSet::<String>::is_empty")] | ||||||
known_measurements: BTreeSet<String>, | ||||||
|
||||||
/// The maximum number of measurements allowed per event that are not known measurements. | ||||||
max_custom_measurements: usize, | ||||||
iker-barriocanal marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
} | ||||||
|
||||||
/// Validate fields that go into a `sentry.models.BoundedIntegerField`. | ||||||
fn validate_bounded_integer_field(value: u64) -> ProcessingResult { | ||||||
if value < 2_147_483_647 { | ||||||
|
@@ -221,12 +236,42 @@ fn normalize_breakdowns(event: &mut Event, breakdowns_config: Option<&Breakdowns | |||||
} | ||||||
} | ||||||
|
||||||
/// Ensure measurements interface is only present for transaction events | ||||||
fn normalize_measurements(event: &mut Event) { | ||||||
/// Enforce the limit on custom (user defined) measurements. | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit / personal opinion: the function accepts a
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I renamed the function and updated the doc comment to clarify what it does. |
||||||
/// | ||||||
/// Note that [`Measurements`] is a BTreeMap, which means its keys are sorted. | ||||||
iker-barriocanal marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
/// This ensures that for two events with the same measurement keys, the same set of custom | ||||||
/// measurements is retained. | ||||||
/// | ||||||
jjbayer marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
fn filter_custom_measurements( | ||||||
measurements: &mut Measurements, | ||||||
measurements_config: &MeasurementsConfig, | ||||||
) { | ||||||
let mut custom_measurements_count = 0; | ||||||
measurements.retain(|name, _| { | ||||||
// Check if this is a builtin measurement: | ||||||
if measurements_config.known_measurements.contains(name) { | ||||||
return true; | ||||||
} | ||||||
// For custom measurements, check the budget: | ||||||
if custom_measurements_count < measurements_config.max_custom_measurements { | ||||||
custom_measurements_count += 1; | ||||||
return true; | ||||||
} | ||||||
|
||||||
false | ||||||
}); | ||||||
} | ||||||
|
||||||
/// Ensure measurements interface is only present for transaction events. | ||||||
fn normalize_measurements(event: &mut Event, measurements_config: Option<&MeasurementsConfig>) { | ||||||
if event.ty.value() != Some(&EventType::Transaction) { | ||||||
// Only transaction events may have a measurements interface | ||||||
event.measurements = Annotated::empty(); | ||||||
} else if let Some(measurements) = event.measurements.value_mut() { | ||||||
if let Some(measurements_config) = measurements_config { | ||||||
filter_custom_measurements(measurements, measurements_config); | ||||||
} | ||||||
|
||||||
let duration_millis = match (event.start_timestamp.0, event.timestamp.0) { | ||||||
(Some(start), Some(end)) => relay_common::chrono_to_positive_millis(end - start), | ||||||
_ => 0.0, | ||||||
|
@@ -540,6 +585,7 @@ pub struct LightNormalizationConfig<'a> { | |||||
pub received_at: Option<DateTime<Utc>>, | ||||||
pub max_secs_in_past: Option<i64>, | ||||||
pub max_secs_in_future: Option<i64>, | ||||||
pub measurements_config: Option<&'a MeasurementsConfig>, | ||||||
pub breakdowns_config: Option<&'a BreakdownsConfig>, | ||||||
pub normalize_user_agent: Option<bool>, | ||||||
} | ||||||
|
@@ -591,7 +637,7 @@ pub fn light_normalize_event( | |||||
light_normalize_stacktraces(event)?; | ||||||
normalize_exceptions(event)?; // Browser extension filters look at the stacktrace | ||||||
normalize_user_agent(event, config.normalize_user_agent); // Legacy browsers filter | ||||||
normalize_measurements(event); // Measurements are part of the metric extraction | ||||||
normalize_measurements(event, config.measurements_config); // Measurements are part of the metric extraction | ||||||
normalize_breakdowns(event, config.breakdowns_config); // Breakdowns are part of the metric extraction too | ||||||
|
||||||
Ok(()) | ||||||
|
@@ -846,6 +892,7 @@ impl<'a> Processor for NormalizeProcessor<'a> { | |||||
#[cfg(test)] | ||||||
mod tests { | ||||||
use chrono::TimeZone; | ||||||
use serde_json::json; | ||||||
use similar_asserts::assert_eq; | ||||||
|
||||||
use crate::processor::process_value; | ||||||
|
@@ -1671,7 +1718,7 @@ mod tests { | |||||
|
||||||
let mut processor = NormalizeProcessor::new( | ||||||
Arc::new(StoreConfig { | ||||||
grouping_config: Some(serde_json::json!({ | ||||||
grouping_config: Some(json!({ | ||||||
"id": "legacy:1234-12-12".to_string(), | ||||||
})), | ||||||
..Default::default() | ||||||
|
@@ -1870,7 +1917,7 @@ mod tests { | |||||
|
||||||
let mut event = Annotated::<Event>::from_json(json).unwrap().0.unwrap(); | ||||||
|
||||||
normalize_measurements(&mut event); | ||||||
normalize_measurements(&mut event, None); | ||||||
|
||||||
insta::assert_ron_snapshot!(SerializableAnnotated(&Annotated::new(event)), {}, @r###" | ||||||
{ | ||||||
|
@@ -1908,6 +1955,55 @@ mod tests { | |||||
"###); | ||||||
} | ||||||
|
||||||
#[test] | ||||||
fn test_filter_custom_measurements() { | ||||||
let json = r#" | ||||||
{ | ||||||
"type": "transaction", | ||||||
"timestamp": "2021-04-26T08:00:05+0100", | ||||||
"start_timestamp": "2021-04-26T08:00:00+0100", | ||||||
"measurements": { | ||||||
"my_custom_measurement_1": {"value": 123}, | ||||||
"frames_slow": {"value": 1}, | ||||||
"my_custom_measurement_3": {"value": 456}, | ||||||
"my_custom_measurement_2": {"value": 789} | ||||||
} | ||||||
} | ||||||
"#; | ||||||
let mut event = Annotated::<Event>::from_json(json).unwrap().0.unwrap(); | ||||||
|
||||||
let config: MeasurementsConfig = serde_json::from_value(json!({ | ||||||
"knownMeasurements": [ | ||||||
"frames_slow" | ||||||
], | ||||||
"maxCustomMeasurements": 2, | ||||||
"stray_key": "zzz" | ||||||
})) | ||||||
.unwrap(); | ||||||
|
||||||
normalize_measurements(&mut event, Some(&config)); | ||||||
|
||||||
// Only two custom measurements are retained, in alphabetic order (1 and 2) | ||||||
insta::assert_ron_snapshot!(SerializableAnnotated(&Annotated::new(event)), {}, @r###" | ||||||
{ | ||||||
"type": "transaction", | ||||||
"timestamp": 1619420405.0, | ||||||
"start_timestamp": 1619420400.0, | ||||||
"measurements": { | ||||||
"frames_slow": { | ||||||
"value": 1.0, | ||||||
}, | ||||||
"my_custom_measurement_1": { | ||||||
"value": 123.0, | ||||||
}, | ||||||
"my_custom_measurement_2": { | ||||||
"value": 789.0, | ||||||
}, | ||||||
}, | ||||||
} | ||||||
"###); | ||||||
} | ||||||
|
||||||
#[test] | ||||||
fn test_light_normalization_is_idempotent() { | ||||||
// get an event, light normalize it. the result of that must be the same as light normalizing it once more | ||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Same suggestion from sentry's PR: I'd rename this to something similar to
builtin_measurements
in order to have one name per concept (and we use "built-in" quite extensively in general).