From e2d5dee2f6c27cb89c69ab8aa90ad0faff222d65 Mon Sep 17 00:00:00 2001 From: Antoine Toulme Date: Thu, 7 Apr 2022 23:55:43 -0700 Subject: [PATCH 1/5] Add ability to sample logs --- CHANGELOG.md | 1 + .../probabilisticsamplerprocessor/README.md | 36 +++- .../probabilisticsamplerprocessor/config.go | 56 ++++++- .../config_test.go | 23 ++- .../probabilisticsamplerprocessor/factory.go | 13 +- .../factory_test.go | 8 + .../logprobabilisticsampler.go | 95 +++++++++++ .../logprobabilisticsampler_test.go | 155 ++++++++++++++++++ .../testdata/config.yaml | 29 ++++ .../testdata/invalid.yaml | 24 +++ 10 files changed, 433 insertions(+), 7 deletions(-) create mode 100644 processor/probabilisticsamplerprocessor/logprobabilisticsampler.go create mode 100644 processor/probabilisticsamplerprocessor/logprobabilisticsampler_test.go create mode 100644 processor/probabilisticsamplerprocessor/testdata/invalid.yaml diff --git a/CHANGELOG.md b/CHANGELOG.md index 3f1e29b1ea7c..6fdba5d1e48e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ - `prometheusremotewriteexporter`: Translate resource attributes to the target info metric (#8493) - `podmanreceiver`: Add API timeout configuration option (#9014) - `cmd/mdatagen`: Add `sem_conv_version` field to metadata.yaml that is used to set metrics SchemaURL (#9010) +- `probabilistic_sampler`: Add ability to sample logs (#9118) ### 🛑 Breaking changes 🛑 diff --git a/processor/probabilisticsamplerprocessor/README.md b/processor/probabilisticsamplerprocessor/README.md index 31e593a1fb74..8f046af95e7b 100644 --- a/processor/probabilisticsamplerprocessor/README.md +++ b/processor/probabilisticsamplerprocessor/README.md @@ -1,8 +1,39 @@ # Probabilistic Sampling Processor -Supported pipeline types: traces +Supported pipeline types: traces, logs -The probabilistic sampler supports two types of sampling: +The probabilistic sampler supports sampling logs by associating a sampling rate to log severity. + +A default sampling rate is mandatory. Additionally, additional sampling rates associated with a log severity can be added. +Any message with a log severity equal or higher to the log severity will adopt the new sampling. + +The probabilistic sampler optionally may use a `hash_seed` to compute the hash of a log record. +This sampler samples based on hash values determined by log records. In order for +log record hashing to work, all collectors for a given tier (e.g. behind the same load balancer) +must have the same `hash_seed`. It is also possible to leverage a different `hash_seed` at +different collector tiers to support additional sampling requirements. Please refer to +[config.go](./config.go) for the config spec. + +The following configuration options can be modified: +- `hash_seed` (no default): An integer used to compute the hash algorithm. Note that all collectors for a given tier (e.g. behind the same load balancer) should have the same hash_seed. +- `sampling_percentage` (default = 0): Percentage at which logs are sampled; >= 100 samples all logs +- `severity/severity_level`: `SeverityText` associated with a [severity level](https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/logs/data-model.md#displaying-severity) +- `severity/sampling_percentage` (default = 0): Percentage at which logs are sampled when the severity is equal or higher to the severity text; >= 100 samples all logs +Examples: + +```yaml +processors: + probabilistic_sampler: + hash_seed: 22 + sampling_percentage: 15 + severity: + - severity_level: error + sampling_percentage: 100 + - severity_level: warn + sampling_percentage: 75 +``` + +The probabilistic sampler supports two types of sampling for traces: 1. `sampling.priority` [semantic convention](https://github.com/opentracing/specification/blob/master/semantic_conventions.md#span-tags-table) @@ -31,3 +62,4 @@ processors: Refer to [config.yaml](./testdata/config.yaml) for detailed examples on using the processor. + diff --git a/processor/probabilisticsamplerprocessor/config.go b/processor/probabilisticsamplerprocessor/config.go index d7dd5dada8c0..226818ee8eb1 100644 --- a/processor/probabilisticsamplerprocessor/config.go +++ b/processor/probabilisticsamplerprocessor/config.go @@ -15,26 +15,76 @@ package probabilisticsamplerprocessor // import "github.com/open-telemetry/opentelemetry-collector-contrib/processor/probabilisticsamplerprocessor" import ( + "fmt" + "go.opentelemetry.io/collector/config" + "go.opentelemetry.io/collector/model/pdata" ) -// Config has the configuration guiding the trace sampler processor. +var severityTextToNum = map[string]pdata.SeverityNumber{ + "default": pdata.SeverityNumberUNDEFINED, + "trace": pdata.SeverityNumberTRACE, + "trace2": pdata.SeverityNumberTRACE2, + "trace3": pdata.SeverityNumberTRACE3, + "trace4": pdata.SeverityNumberTRACE4, + "debug": pdata.SeverityNumberDEBUG, + "debug2": pdata.SeverityNumberDEBUG2, + "debug3": pdata.SeverityNumberDEBUG3, + "debug4": pdata.SeverityNumberDEBUG4, + "info": pdata.SeverityNumberINFO, + "info2": pdata.SeverityNumberINFO2, + "info3": pdata.SeverityNumberINFO3, + "info4": pdata.SeverityNumberINFO4, + "warn": pdata.SeverityNumberWARN, + "warn2": pdata.SeverityNumberWARN2, + "warn3": pdata.SeverityNumberWARN3, + "warn4": pdata.SeverityNumberWARN4, + "error": pdata.SeverityNumberERROR, + "error2": pdata.SeverityNumberERROR2, + "error3": pdata.SeverityNumberERROR3, + "error4": pdata.SeverityNumberERROR4, + "fatal": pdata.SeverityNumberFATAL, + "fatal2": pdata.SeverityNumberFATAL2, + "fatal3": pdata.SeverityNumberFATAL3, + "fatal4": pdata.SeverityNumberFATAL4, +} + +type severityPair struct { + Level string `mapstructure:"severity_level"` + SamplingPercentage float32 `mapstructure:"sampling_percentage"` +} + +// Config has the configuration guiding the sampler processor. type Config struct { config.ProcessorSettings `mapstructure:",squash"` // squash ensures fields are correctly decoded in embedded struct - // SamplingPercentage is the percentage rate at which traces are going to be sampled. Defaults to zero, i.e.: no sample. - // Values greater or equal 100 are treated as "sample all traces". + // SamplingPercentage is the percentage rate at which traces or logs are going to be sampled. Defaults to zero, i.e.: no sample. + // Values greater or equal 100 are treated as "sample all traces/logs". SamplingPercentage float32 `mapstructure:"sampling_percentage"` // HashSeed allows one to configure the hashing seed. This is important in scenarios where multiple layers of collectors // have different sampling rates: if they use the same seed all passing one layer may pass the other even if they have // different sampling rates, configuring different seeds avoids that. HashSeed uint32 `mapstructure:"hash_seed"` + + // Severity is an array of severity and sampling percentage pairs allocating a specific sampling percentage + // to a given severity level. + Severity []severityPair `mapstructure:"severity"` } var _ config.Processor = (*Config)(nil) // Validate checks if the processor configuration is valid func (cfg *Config) Validate() error { + keys := map[string]bool{} + for _, pair := range cfg.Severity { + if _, ok := severityTextToNum[pair.Level]; !ok { + return fmt.Errorf("unrecognized severity level: %s", pair.Level) + } + if keys[pair.Level] { + return fmt.Errorf("severity already used: %s", pair.Level) + } + keys[pair.Level] = true + } return nil } diff --git a/processor/probabilisticsamplerprocessor/config_test.go b/processor/probabilisticsamplerprocessor/config_test.go index 09e7eeedf934..96f7c4a1344d 100644 --- a/processor/probabilisticsamplerprocessor/config_test.go +++ b/processor/probabilisticsamplerprocessor/config_test.go @@ -42,7 +42,17 @@ func TestLoadConfig(t *testing.T) { SamplingPercentage: 15.3, HashSeed: 22, }) - + p1 := cfg.Processors[config.NewComponentIDWithName(typeStr, "logs")] + assert.Equal(t, + &Config{ + ProcessorSettings: config.NewProcessorSettings(config.NewComponentIDWithName(typeStr, "logs")), + SamplingPercentage: 15.3, + HashSeed: 22, + Severity: []severityPair{ + {Level: "error", SamplingPercentage: 100}, + {Level: "warn", SamplingPercentage: 80}, + }, + }, p1) } func TestLoadConfigEmpty(t *testing.T) { @@ -59,3 +69,14 @@ func TestLoadConfigEmpty(t *testing.T) { p0 := cfg.Processors[config.NewComponentID(typeStr)] assert.Equal(t, p0, createDefaultConfig()) } + +func TestLoadInvalidConfig(t *testing.T) { + factories, err := componenttest.NopFactories() + require.NoError(t, err) + + factory := NewFactory() + factories.Processors[typeStr] = factory + + _, err = servicetest.LoadConfigAndValidate(filepath.Join("testdata", "invalid.yaml"), factories) + require.ErrorContains(t, err, "severity already used: error") +} diff --git a/processor/probabilisticsamplerprocessor/factory.go b/processor/probabilisticsamplerprocessor/factory.go index 71f16f192c03..2109739e18ec 100644 --- a/processor/probabilisticsamplerprocessor/factory.go +++ b/processor/probabilisticsamplerprocessor/factory.go @@ -32,7 +32,8 @@ func NewFactory() component.ProcessorFactory { return component.NewProcessorFactory( typeStr, createDefaultConfig, - component.WithTracesProcessor(createTracesProcessor)) + component.WithTracesProcessor(createTracesProcessor), + component.WithLogsProcessor(createLogsProcessor)) } func createDefaultConfig() config.Processor { @@ -50,3 +51,13 @@ func createTracesProcessor( ) (component.TracesProcessor, error) { return newTracesProcessor(nextConsumer, cfg.(*Config)) } + +// createLogsProcessor creates a log processor based on this config. +func createLogsProcessor( + _ context.Context, + _ component.ProcessorCreateSettings, + cfg config.Processor, + nextConsumer consumer.Logs, +) (component.LogsProcessor, error) { + return newLogsProcessor(nextConsumer, cfg.(*Config)) +} diff --git a/processor/probabilisticsamplerprocessor/factory_test.go b/processor/probabilisticsamplerprocessor/factory_test.go index e5e0a9f154f0..72fb15e454d8 100644 --- a/processor/probabilisticsamplerprocessor/factory_test.go +++ b/processor/probabilisticsamplerprocessor/factory_test.go @@ -37,3 +37,11 @@ func TestCreateProcessor(t *testing.T) { assert.NotNil(t, tp) assert.NoError(t, err, "cannot create trace processor") } + +func TestCreateProcessorLogs(t *testing.T) { + cfg := createDefaultConfig() + set := componenttest.NewNopProcessorCreateSettings() + tp, err := createLogsProcessor(context.Background(), set, cfg, consumertest.NewNop()) + assert.NotNil(t, tp) + assert.NoError(t, err, "cannot create logs processor") +} diff --git a/processor/probabilisticsamplerprocessor/logprobabilisticsampler.go b/processor/probabilisticsamplerprocessor/logprobabilisticsampler.go new file mode 100644 index 000000000000..1fcb9a5e546c --- /dev/null +++ b/processor/probabilisticsamplerprocessor/logprobabilisticsampler.go @@ -0,0 +1,95 @@ +// Copyright The OpenTelemetry Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package probabilisticsamplerprocessor // import "github.com/open-telemetry/opentelemetry-collector-contrib/processor/probabilisticsamplerprocessor" + +import ( + "context" + "sort" + + "go.opentelemetry.io/collector/component" + "go.opentelemetry.io/collector/consumer" + "go.opentelemetry.io/collector/model/pdata" + "go.opentelemetry.io/collector/processor/processorhelper" +) + +type severitySamplingRate struct { + level pdata.SeverityNumber + scaledSamplingRate uint32 +} + +type logsamplerprocessor struct { + samplingRates []*severitySamplingRate + hashSeed uint32 +} + +// newLogsProcessor returns a processor.LogsProcessor that will perform head sampling according to the given +// configuration. +func newLogsProcessor(nextConsumer consumer.Logs, cfg *Config) (component.LogsProcessor, error) { + + severitySamplingRates := []*severitySamplingRate{ + {level: pdata.SeverityNumberUNDEFINED, scaledSamplingRate: uint32(cfg.SamplingPercentage * percentageScaleFactor)}, + } + sort.SliceStable(cfg.Severity, func(i, j int) bool { + return severityTextToNum[cfg.Severity[i].Level] < severityTextToNum[cfg.Severity[j].Level] + }) + for _, pair := range cfg.Severity { + newRate := &severitySamplingRate{level: severityTextToNum[pair.Level], + scaledSamplingRate: uint32(pair.SamplingPercentage * percentageScaleFactor), + } + severitySamplingRates = append(severitySamplingRates, newRate) + } + + lsp := &logsamplerprocessor{ + samplingRates: severitySamplingRates, + hashSeed: cfg.HashSeed, + } + + return processorhelper.NewLogsProcessor( + cfg, + nextConsumer, + lsp.processLogs, + processorhelper.WithCapabilities(consumer.Capabilities{MutatesData: true})) +} + +func (lsp *logsamplerprocessor) processLogs(_ context.Context, ld pdata.Logs) (pdata.Logs, error) { + ld.ResourceLogs().RemoveIf(func(rl pdata.ResourceLogs) bool { + rl.ScopeLogs().RemoveIf(func(ill pdata.ScopeLogs) bool { + ill.LogRecords().RemoveIf(func(l pdata.LogRecord) bool { + + // find the correct severity sampling level. + var selectedSamplingRate *severitySamplingRate + for _, ssr := range lsp.samplingRates { + if ssr.level > l.SeverityNumber() { + break + } + selectedSamplingRate = ssr + } + + // Create an id for the log record by combining the timestamp and severity text. + lidBytes := []byte(l.Timestamp().String() + l.SeverityText()) + sampled := hash(lidBytes[:], lsp.hashSeed)&bitMaskHashBuckets < selectedSamplingRate.scaledSamplingRate + return !sampled + }) + // Filter out empty ScopeLogs + return ill.LogRecords().Len() == 0 + }) + // Filter out empty ResourceLogs + return rl.ScopeLogs().Len() == 0 + }) + if ld.ResourceLogs().Len() == 0 { + return ld, processorhelper.ErrSkipProcessingData + } + return ld, nil +} diff --git a/processor/probabilisticsamplerprocessor/logprobabilisticsampler_test.go b/processor/probabilisticsamplerprocessor/logprobabilisticsampler_test.go new file mode 100644 index 000000000000..2c340560bbd2 --- /dev/null +++ b/processor/probabilisticsamplerprocessor/logprobabilisticsampler_test.go @@ -0,0 +1,155 @@ +// Copyright The OpenTelemetry Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package probabilisticsamplerprocessor + +import ( + "context" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.opentelemetry.io/collector/config" + "go.opentelemetry.io/collector/consumer" + "go.opentelemetry.io/collector/consumer/consumertest" + "go.opentelemetry.io/collector/model/pdata" +) + +func TestNewLogsProcessor(t *testing.T) { + tests := []struct { + name string + nextConsumer consumer.Logs + cfg *Config + wantErr bool + }{ + { + name: "nil_nextConsumer", + cfg: &Config{ + ProcessorSettings: config.NewProcessorSettings(config.NewComponentID(typeStr)), + SamplingPercentage: 15.5, + }, + wantErr: true, + }, + { + name: "happy_path", + nextConsumer: consumertest.NewNop(), + cfg: &Config{ + ProcessorSettings: config.NewProcessorSettings(config.NewComponentID(typeStr)), + SamplingPercentage: 15.5, + }, + }, + { + name: "happy_path_hash_seed", + nextConsumer: consumertest.NewNop(), + cfg: &Config{ + ProcessorSettings: config.NewProcessorSettings(config.NewComponentID(typeStr)), + SamplingPercentage: 13.33, + HashSeed: 4321, + }, + }, + { + name: "with_severity", + nextConsumer: consumertest.NewNop(), + cfg: &Config{ + ProcessorSettings: config.NewProcessorSettings(config.NewComponentID(typeStr)), + SamplingPercentage: 13.33, + HashSeed: 4321, + Severity: []severityPair{ + {"error", 90}, + }, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := newLogsProcessor(tt.nextConsumer, tt.cfg) + if tt.wantErr { + assert.Error(t, err) + } else { + assert.NoError(t, err) + assert.NotNil(t, got) + } + }) + } +} + +func TestLogsSampling(t *testing.T) { + tests := []struct { + name string + cfg *Config + received int + }{ + { + name: "happy_path", + cfg: &Config{ + ProcessorSettings: config.NewProcessorSettings(config.NewComponentID(typeStr)), + SamplingPercentage: 100, + }, + received: 2, + }, + { + name: "nothing", + cfg: &Config{ + ProcessorSettings: config.NewProcessorSettings(config.NewComponentID(typeStr)), + SamplingPercentage: 0, + }, + received: 0, + }, + { + name: "half", + cfg: &Config{ + ProcessorSettings: config.NewProcessorSettings(config.NewComponentID(typeStr)), + SamplingPercentage: 49, + }, + received: 1, + }, + { + name: "nothing_except_errors", + cfg: &Config{ + ProcessorSettings: config.NewProcessorSettings(config.NewComponentID(typeStr)), + SamplingPercentage: 0, + Severity: []severityPair{ + {"error", 100}, + }, + }, + received: 1, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + sink := new(consumertest.LogsSink) + processor, err := newLogsProcessor(sink, tt.cfg) + require.NoError(t, err) + logs := pdata.NewLogs() + lr := logs.ResourceLogs().AppendEmpty().ScopeLogs().AppendEmpty().LogRecords() + for i := 0; i < 5; i++ { + first := lr.AppendEmpty() + first.SetTimestamp(pdata.Timestamp(time.Unix(1649400860, 0).Unix())) + first.SetSeverityNumber(pdata.SeverityNumberDEBUG) + second := lr.AppendEmpty() + second.SetTimestamp(pdata.Timestamp(time.Unix(12345555432, 0).Unix())) + second.SetSeverityNumber(pdata.SeverityNumberERROR) + } + err = processor.ConsumeLogs(context.Background(), logs) + require.NoError(t, err) + sunk := sink.AllLogs() + numReceived := 0 + if len(sunk) > 0 && sunk[0].ResourceLogs().Len() > 0 { + numReceived = sunk[0].ResourceLogs().At(0).ScopeLogs().At(0).LogRecords().Len() + } + assert.Equal(t, tt.received*5, numReceived) + }) + } +} diff --git a/processor/probabilisticsamplerprocessor/testdata/config.yaml b/processor/probabilisticsamplerprocessor/testdata/config.yaml index a421c8258bbf..5e8f19233d55 100644 --- a/processor/probabilisticsamplerprocessor/testdata/config.yaml +++ b/processor/probabilisticsamplerprocessor/testdata/config.yaml @@ -24,6 +24,31 @@ processors: # intended. hash_seed: 22 + probabilistic_sampler/logs: + # the percentage rate at which logs are going to be sampled. Defaults to + # zero, i.e.: no sample. Values greater or equal 100 are treated as + # "sample all logs". + sampling_percentage: 15.3 + # hash_seed allows one to configure the hashing seed. This is important in + # scenarios where multiple layers of collectors are used to achieve the + # desired sampling rate, eg.: 10% on first layer and 10% on the + # second, resulting in an overall sampling rate of 1% (10% x 10%). + # If all layers use the same seed, all data passing one layer will also pass + # the next one, independent of the configured sampling rate. Having different + # seeds at different layers ensures that sampling rate in each layer work as + # intended. + hash_seed: 22 + # Severity filters override the default sampling_percentage sampling rate. + severity: + # Override the default sampling_percentage for all log records of severity of error or higher + # to keep all logs. + - sampling_percentage: 100 + severity_level: error + # Override the default sampling_percentage for all log records of severity of warn or higher + # to keep 80% of logs. Note this doesn't override the error logs sampling rate. + - sampling_percentage: 80 + severity_level: warn + exporters: nop: @@ -33,3 +58,7 @@ service: receivers: [nop] processors: [probabilistic_sampler] exporters: [nop] + logs: + receivers: [ nop ] + processors: [ probabilistic_sampler/logs ] + exporters: [ nop ] diff --git a/processor/probabilisticsamplerprocessor/testdata/invalid.yaml b/processor/probabilisticsamplerprocessor/testdata/invalid.yaml new file mode 100644 index 000000000000..f252ae919cdb --- /dev/null +++ b/processor/probabilisticsamplerprocessor/testdata/invalid.yaml @@ -0,0 +1,24 @@ +receivers: + nop: + +processors: + + probabilistic_sampler/logs: + sampling_percentage: 15.3 + hash_seed: 22 + severity: + - sampling_percentage: 100 + severity_level: error + # Duplicate severity level sampling rate! + - sampling_percentage: 80 + severity_level: error + +exporters: + nop: + +service: + pipelines: + logs: + receivers: [ nop ] + processors: [ probabilistic_sampler/logs ] + exporters: [ nop ] From e64b06aaa097c0a61d237e724e7d8df69941c329 Mon Sep 17 00:00:00 2001 From: Antoine Toulme Date: Fri, 8 Apr 2022 16:33:29 -0700 Subject: [PATCH 2/5] code review: validator for negative rates --- processor/probabilisticsamplerprocessor/config.go | 6 ++++++ .../probabilisticsamplerprocessor/config_test.go | 14 ++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/processor/probabilisticsamplerprocessor/config.go b/processor/probabilisticsamplerprocessor/config.go index 226818ee8eb1..845d705d6b95 100644 --- a/processor/probabilisticsamplerprocessor/config.go +++ b/processor/probabilisticsamplerprocessor/config.go @@ -76,11 +76,17 @@ var _ config.Processor = (*Config)(nil) // Validate checks if the processor configuration is valid func (cfg *Config) Validate() error { + if cfg.SamplingPercentage < 0 { + return fmt.Errorf("negative sampling rate: %.2f", cfg.SamplingPercentage) + } keys := map[string]bool{} for _, pair := range cfg.Severity { if _, ok := severityTextToNum[pair.Level]; !ok { return fmt.Errorf("unrecognized severity level: %s", pair.Level) } + if pair.SamplingPercentage < 0 { + return fmt.Errorf("negative sampling rate: %.2f [%s]", pair.SamplingPercentage, pair.Level) + } if keys[pair.Level] { return fmt.Errorf("severity already used: %s", pair.Level) } diff --git a/processor/probabilisticsamplerprocessor/config_test.go b/processor/probabilisticsamplerprocessor/config_test.go index 96f7c4a1344d..0300b3a7e37a 100644 --- a/processor/probabilisticsamplerprocessor/config_test.go +++ b/processor/probabilisticsamplerprocessor/config_test.go @@ -80,3 +80,17 @@ func TestLoadInvalidConfig(t *testing.T) { _, err = servicetest.LoadConfigAndValidate(filepath.Join("testdata", "invalid.yaml"), factories) require.ErrorContains(t, err, "severity already used: error") } + +func TestNegativeSamplingRate(t *testing.T) { + cfg := createDefaultConfig() + cfg.(*Config).SamplingPercentage = -5 + err := cfg.Validate() + require.ErrorContains(t, err, "negative sampling rate: -5.00") + + cfg = createDefaultConfig() + cfg.(*Config).Severity = []severityPair{ + {Level: "error", SamplingPercentage: -4.344}, + } + err = cfg.Validate() + require.ErrorContains(t, err, "negative sampling rate: -4.34 [error]") +} From fefae2bb3afb306c28ba1a867bcdbad133daae26 Mon Sep 17 00:00:00 2001 From: Antoine Toulme Date: Sat, 9 Apr 2022 15:38:14 -0700 Subject: [PATCH 3/5] Redo with a different approach, using attributes --- .../probabilisticsamplerprocessor/README.md | 70 +++++++------ .../probabilisticsamplerprocessor/config.go | 59 ++--------- .../config_test.go | 20 +--- .../logprobabilisticsampler.go | 58 +++++------ .../logprobabilisticsampler_test.go | 97 +++++++++++++------ .../testdata/config.yaml | 16 ++- .../testdata/invalid.yaml | 8 +- 7 files changed, 153 insertions(+), 175 deletions(-) diff --git a/processor/probabilisticsamplerprocessor/README.md b/processor/probabilisticsamplerprocessor/README.md index 8f046af95e7b..d84490d5d8b8 100644 --- a/processor/probabilisticsamplerprocessor/README.md +++ b/processor/probabilisticsamplerprocessor/README.md @@ -2,64 +2,78 @@ Supported pipeline types: traces, logs -The probabilistic sampler supports sampling logs by associating a sampling rate to log severity. +The probabilistic sampler supports two types of sampling for traces: -A default sampling rate is mandatory. Additionally, additional sampling rates associated with a log severity can be added. -Any message with a log severity equal or higher to the log severity will adopt the new sampling. +1. `sampling.priority` [semantic +convention](https://github.com/opentracing/specification/blob/master/semantic_conventions.md#span-tags-table) +as defined by OpenTracing +2. Trace ID hashing -The probabilistic sampler optionally may use a `hash_seed` to compute the hash of a log record. -This sampler samples based on hash values determined by log records. In order for -log record hashing to work, all collectors for a given tier (e.g. behind the same load balancer) +The `sampling.priority` semantic convention takes priority over trace ID hashing. As the name +implies, trace ID hashing samples based on hash values determined by trace IDs. In order for +trace ID hashing to work, all collectors for a given tier (e.g. behind the same load balancer) must have the same `hash_seed`. It is also possible to leverage a different `hash_seed` at different collector tiers to support additional sampling requirements. Please refer to [config.go](./config.go) for the config spec. The following configuration options can be modified: - `hash_seed` (no default): An integer used to compute the hash algorithm. Note that all collectors for a given tier (e.g. behind the same load balancer) should have the same hash_seed. -- `sampling_percentage` (default = 0): Percentage at which logs are sampled; >= 100 samples all logs -- `severity/severity_level`: `SeverityText` associated with a [severity level](https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/logs/data-model.md#displaying-severity) -- `severity/sampling_percentage` (default = 0): Percentage at which logs are sampled when the severity is equal or higher to the severity text; >= 100 samples all logs +- `sampling_percentage` (default = 0): Percentage at which traces are sampled; >= 100 samples all traces + Examples: ```yaml processors: probabilistic_sampler: hash_seed: 22 - sampling_percentage: 15 - severity: - - severity_level: error - sampling_percentage: 100 - - severity_level: warn - sampling_percentage: 75 + sampling_percentage: 15.3 ``` -The probabilistic sampler supports two types of sampling for traces: - -1. `sampling.priority` [semantic -convention](https://github.com/opentracing/specification/blob/master/semantic_conventions.md#span-tags-table) -as defined by OpenTracing -2. Trace ID hashing +The probabilistic sampler supports sampling logs according to their trace ID, or by a specific log record attribute. -The `sampling.priority` semantic convention takes priority over trace ID hashing. As the name -implies, trace ID hashing samples based on hash values determined by trace IDs. In order for -trace ID hashing to work, all collectors for a given tier (e.g. behind the same load balancer) +The probabilistic sampler optionally may use a `hash_seed` to compute the hash of a log record. +This sampler samples based on hash values determined by log records. In order for +log record hashing to work, all collectors for a given tier (e.g. behind the same load balancer) must have the same `hash_seed`. It is also possible to leverage a different `hash_seed` at different collector tiers to support additional sampling requirements. Please refer to [config.go](./config.go) for the config spec. The following configuration options can be modified: -- `hash_seed` (no default): An integer used to compute the hash algorithm. Note that all collectors for a given tier (e.g. behind the same load balancer) should have the same hash_seed. -- `sampling_percentage` (default = 0): Percentage at which traces are sampled; >= 100 samples all traces +- `hash_seed` (no default, optional): An integer used to compute the hash algorithm. Note that all collectors for a given tier (e.g. behind the same load balancer) should have the same hash_seed. +- `sampling_percentage` (default = 0, required): Percentage at which logs are sampled; >= 100 samples all logs +- `trace_id_sampling` (default = true, optional): Whether to use the log record trace ID to sample the log record. +- `sampling_source` (default = null, optional): The optional name of a log record attribute used for sampling purposes, such as a unique log record ID. The value of the attribute is only used if the trace ID is absent or if `trace_id_sampling` is set to `false`. +- `sampling_priority` (default = null, optional): The optional name of a log record attribute used to set a different sampling priority from the `sampling_percentage` setting. 0 means to never sample the log record, and >= 100 means to always sample the log record. Examples: +Sample 15% of the logs: ```yaml processors: probabilistic_sampler: - hash_seed: 22 - sampling_percentage: 15.3 + sampling_percentage: 15 ``` +Sample logs according to their logID attribute: + +```yaml +processors: + probabilistic_sampler: + sampling_percentage: 15 + trace_id_sampling: false + sampling_source: logID +``` + +Sample logs according to the attribute `priority`: + +```yaml +processors: + probabilistic_sampler: + sampling_percentage: 15 + sampling_priority: priority +``` + + Refer to [config.yaml](./testdata/config.yaml) for detailed examples on using the processor. diff --git a/processor/probabilisticsamplerprocessor/config.go b/processor/probabilisticsamplerprocessor/config.go index 845d705d6b95..3e5d586361bc 100644 --- a/processor/probabilisticsamplerprocessor/config.go +++ b/processor/probabilisticsamplerprocessor/config.go @@ -18,42 +18,8 @@ import ( "fmt" "go.opentelemetry.io/collector/config" - "go.opentelemetry.io/collector/model/pdata" ) -var severityTextToNum = map[string]pdata.SeverityNumber{ - "default": pdata.SeverityNumberUNDEFINED, - "trace": pdata.SeverityNumberTRACE, - "trace2": pdata.SeverityNumberTRACE2, - "trace3": pdata.SeverityNumberTRACE3, - "trace4": pdata.SeverityNumberTRACE4, - "debug": pdata.SeverityNumberDEBUG, - "debug2": pdata.SeverityNumberDEBUG2, - "debug3": pdata.SeverityNumberDEBUG3, - "debug4": pdata.SeverityNumberDEBUG4, - "info": pdata.SeverityNumberINFO, - "info2": pdata.SeverityNumberINFO2, - "info3": pdata.SeverityNumberINFO3, - "info4": pdata.SeverityNumberINFO4, - "warn": pdata.SeverityNumberWARN, - "warn2": pdata.SeverityNumberWARN2, - "warn3": pdata.SeverityNumberWARN3, - "warn4": pdata.SeverityNumberWARN4, - "error": pdata.SeverityNumberERROR, - "error2": pdata.SeverityNumberERROR2, - "error3": pdata.SeverityNumberERROR3, - "error4": pdata.SeverityNumberERROR4, - "fatal": pdata.SeverityNumberFATAL, - "fatal2": pdata.SeverityNumberFATAL2, - "fatal3": pdata.SeverityNumberFATAL3, - "fatal4": pdata.SeverityNumberFATAL4, -} - -type severityPair struct { - Level string `mapstructure:"severity_level"` - SamplingPercentage float32 `mapstructure:"sampling_percentage"` -} - // Config has the configuration guiding the sampler processor. type Config struct { config.ProcessorSettings `mapstructure:",squash"` // squash ensures fields are correctly decoded in embedded struct @@ -67,9 +33,15 @@ type Config struct { // different sampling rates, configuring different seeds avoids that. HashSeed uint32 `mapstructure:"hash_seed"` - // Severity is an array of severity and sampling percentage pairs allocating a specific sampling percentage - // to a given severity level. - Severity []severityPair `mapstructure:"severity"` + // TraceIDEnabled (logs only) allows to choose to use to sample by trace id or by a specific log record attribute. + // By default, this option is true. + TraceIDEnabled *bool `mapstructure:"trace_id_sampling"` + // SamplingSource (logs only) allows to use a log record attribute designed by the `sampling_source` key + // to be used to compute the sampling hash of the log record instead of trace id, if trace id is absent or trace id sampling is disabled. + SamplingSource string `mapstructure:"sampling_source"` + // SamplingPriority (logs only) allows to use a log record attribute designed by the `sampling_priority` key + // to be used as the sampling priority of the log record. + SamplingPriority string `mapstructure:"sampling_priority"` } var _ config.Processor = (*Config)(nil) @@ -79,18 +51,5 @@ func (cfg *Config) Validate() error { if cfg.SamplingPercentage < 0 { return fmt.Errorf("negative sampling rate: %.2f", cfg.SamplingPercentage) } - keys := map[string]bool{} - for _, pair := range cfg.Severity { - if _, ok := severityTextToNum[pair.Level]; !ok { - return fmt.Errorf("unrecognized severity level: %s", pair.Level) - } - if pair.SamplingPercentage < 0 { - return fmt.Errorf("negative sampling rate: %.2f [%s]", pair.SamplingPercentage, pair.Level) - } - if keys[pair.Level] { - return fmt.Errorf("severity already used: %s", pair.Level) - } - keys[pair.Level] = true - } return nil } diff --git a/processor/probabilisticsamplerprocessor/config_test.go b/processor/probabilisticsamplerprocessor/config_test.go index 0300b3a7e37a..1727a6d536ac 100644 --- a/processor/probabilisticsamplerprocessor/config_test.go +++ b/processor/probabilisticsamplerprocessor/config_test.go @@ -48,10 +48,6 @@ func TestLoadConfig(t *testing.T) { ProcessorSettings: config.NewProcessorSettings(config.NewComponentIDWithName(typeStr, "logs")), SamplingPercentage: 15.3, HashSeed: 22, - Severity: []severityPair{ - {Level: "error", SamplingPercentage: 100}, - {Level: "warn", SamplingPercentage: 80}, - }, }, p1) } @@ -78,19 +74,5 @@ func TestLoadInvalidConfig(t *testing.T) { factories.Processors[typeStr] = factory _, err = servicetest.LoadConfigAndValidate(filepath.Join("testdata", "invalid.yaml"), factories) - require.ErrorContains(t, err, "severity already used: error") -} - -func TestNegativeSamplingRate(t *testing.T) { - cfg := createDefaultConfig() - cfg.(*Config).SamplingPercentage = -5 - err := cfg.Validate() - require.ErrorContains(t, err, "negative sampling rate: -5.00") - - cfg = createDefaultConfig() - cfg.(*Config).Severity = []severityPair{ - {Level: "error", SamplingPercentage: -4.344}, - } - err = cfg.Validate() - require.ErrorContains(t, err, "negative sampling rate: -4.34 [error]") + require.ErrorContains(t, err, "negative sampling rate: -15.30") } diff --git a/processor/probabilisticsamplerprocessor/logprobabilisticsampler.go b/processor/probabilisticsamplerprocessor/logprobabilisticsampler.go index 1fcb9a5e546c..b4f609c84232 100644 --- a/processor/probabilisticsamplerprocessor/logprobabilisticsampler.go +++ b/processor/probabilisticsamplerprocessor/logprobabilisticsampler.go @@ -16,7 +16,6 @@ package probabilisticsamplerprocessor // import "github.com/open-telemetry/opent import ( "context" - "sort" "go.opentelemetry.io/collector/component" "go.opentelemetry.io/collector/consumer" @@ -24,36 +23,24 @@ import ( "go.opentelemetry.io/collector/processor/processorhelper" ) -type severitySamplingRate struct { - level pdata.SeverityNumber - scaledSamplingRate uint32 -} - type logsamplerprocessor struct { - samplingRates []*severitySamplingRate - hashSeed uint32 + scaledSamplingRate uint32 + hashSeed uint32 + traceIdEnabled bool + samplingSource string + samplingPriority string } // newLogsProcessor returns a processor.LogsProcessor that will perform head sampling according to the given // configuration. func newLogsProcessor(nextConsumer consumer.Logs, cfg *Config) (component.LogsProcessor, error) { - severitySamplingRates := []*severitySamplingRate{ - {level: pdata.SeverityNumberUNDEFINED, scaledSamplingRate: uint32(cfg.SamplingPercentage * percentageScaleFactor)}, - } - sort.SliceStable(cfg.Severity, func(i, j int) bool { - return severityTextToNum[cfg.Severity[i].Level] < severityTextToNum[cfg.Severity[j].Level] - }) - for _, pair := range cfg.Severity { - newRate := &severitySamplingRate{level: severityTextToNum[pair.Level], - scaledSamplingRate: uint32(pair.SamplingPercentage * percentageScaleFactor), - } - severitySamplingRates = append(severitySamplingRates, newRate) - } - lsp := &logsamplerprocessor{ - samplingRates: severitySamplingRates, - hashSeed: cfg.HashSeed, + scaledSamplingRate: uint32(cfg.SamplingPercentage * percentageScaleFactor), + hashSeed: cfg.HashSeed, + traceIdEnabled: cfg.TraceIDEnabled == nil || *cfg.TraceIDEnabled, + samplingPriority: cfg.SamplingPriority, + samplingSource: cfg.SamplingSource, } return processorhelper.NewLogsProcessor( @@ -68,18 +55,25 @@ func (lsp *logsamplerprocessor) processLogs(_ context.Context, ld pdata.Logs) (p rl.ScopeLogs().RemoveIf(func(ill pdata.ScopeLogs) bool { ill.LogRecords().RemoveIf(func(l pdata.LogRecord) bool { - // find the correct severity sampling level. - var selectedSamplingRate *severitySamplingRate - for _, ssr := range lsp.samplingRates { - if ssr.level > l.SeverityNumber() { - break + // pick the sampling source. + var lidBytes []byte + if lsp.traceIdEnabled && !l.TraceID().IsEmpty() { + value := l.TraceID().Bytes() + lidBytes = value[:] + } + if lidBytes == nil && lsp.samplingSource != "" { + if value, ok := l.Attributes().Get(lsp.samplingSource); ok { + lidBytes = value.BytesVal() + } + } + priority := lsp.scaledSamplingRate + if lsp.samplingPriority != "" { + if localPriority, ok := l.Attributes().Get(lsp.samplingPriority); ok { + priority = uint32(localPriority.DoubleVal() * percentageScaleFactor) } - selectedSamplingRate = ssr } - // Create an id for the log record by combining the timestamp and severity text. - lidBytes := []byte(l.Timestamp().String() + l.SeverityText()) - sampled := hash(lidBytes[:], lsp.hashSeed)&bitMaskHashBuckets < selectedSamplingRate.scaledSamplingRate + sampled := hash(lidBytes, lsp.hashSeed)&bitMaskHashBuckets < priority return !sampled }) // Filter out empty ScopeLogs diff --git a/processor/probabilisticsamplerprocessor/logprobabilisticsampler_test.go b/processor/probabilisticsamplerprocessor/logprobabilisticsampler_test.go index 2c340560bbd2..e99cb70d2f77 100644 --- a/processor/probabilisticsamplerprocessor/logprobabilisticsampler_test.go +++ b/processor/probabilisticsamplerprocessor/logprobabilisticsampler_test.go @@ -59,18 +59,6 @@ func TestNewLogsProcessor(t *testing.T) { HashSeed: 4321, }, }, - { - name: "with_severity", - nextConsumer: consumertest.NewNop(), - cfg: &Config{ - ProcessorSettings: config.NewProcessorSettings(config.NewComponentID(typeStr)), - SamplingPercentage: 13.33, - HashSeed: 4321, - Severity: []severityPair{ - {"error", 90}, - }, - }, - }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { @@ -97,7 +85,7 @@ func TestLogsSampling(t *testing.T) { ProcessorSettings: config.NewProcessorSettings(config.NewComponentID(typeStr)), SamplingPercentage: 100, }, - received: 2, + received: 100, }, { name: "nothing", @@ -108,23 +96,62 @@ func TestLogsSampling(t *testing.T) { received: 0, }, { - name: "half", + name: "roughly half", + cfg: &Config{ + ProcessorSettings: config.NewProcessorSettings(config.NewComponentID(typeStr)), + SamplingPercentage: 50, + }, + received: 52, + }, + { + name: "sampling_source no sampling", + cfg: &Config{ + ProcessorSettings: config.NewProcessorSettings(config.NewComponentID(typeStr)), + SamplingPercentage: 0, + TraceIDEnabled: boolPtr(false), + SamplingSource: "foo", + }, + received: 0, + }, + { + name: "sampling_source all sampling", + cfg: &Config{ + ProcessorSettings: config.NewProcessorSettings(config.NewComponentID(typeStr)), + SamplingPercentage: 100, + TraceIDEnabled: boolPtr(false), + SamplingSource: "foo", + }, + received: 100, + }, + { + name: "sampling_source sampling", cfg: &Config{ ProcessorSettings: config.NewProcessorSettings(config.NewComponentID(typeStr)), - SamplingPercentage: 49, + SamplingPercentage: 50, + TraceIDEnabled: boolPtr(false), + SamplingSource: "foo", }, - received: 1, + received: 79, }, { - name: "nothing_except_errors", + name: "sampling_priority", cfg: &Config{ ProcessorSettings: config.NewProcessorSettings(config.NewComponentID(typeStr)), SamplingPercentage: 0, - Severity: []severityPair{ - {"error", 100}, - }, + SamplingPriority: "priority", }, - received: 1, + received: 25, + }, + { + name: "sampling_priority with sampling field", + cfg: &Config{ + ProcessorSettings: config.NewProcessorSettings(config.NewComponentID(typeStr)), + SamplingPercentage: 0, + TraceIDEnabled: boolPtr(false), + SamplingSource: "foo", + SamplingPriority: "priority", + }, + received: 25, }, } for _, tt := range tests { @@ -134,13 +161,21 @@ func TestLogsSampling(t *testing.T) { require.NoError(t, err) logs := pdata.NewLogs() lr := logs.ResourceLogs().AppendEmpty().ScopeLogs().AppendEmpty().LogRecords() - for i := 0; i < 5; i++ { - first := lr.AppendEmpty() - first.SetTimestamp(pdata.Timestamp(time.Unix(1649400860, 0).Unix())) - first.SetSeverityNumber(pdata.SeverityNumberDEBUG) - second := lr.AppendEmpty() - second.SetTimestamp(pdata.Timestamp(time.Unix(12345555432, 0).Unix())) - second.SetSeverityNumber(pdata.SeverityNumberERROR) + for i := 0; i < 100; i++ { + record := lr.AppendEmpty() + record.SetTimestamp(pdata.Timestamp(time.Unix(1649400860, 0).Unix())) + record.SetSeverityNumber(pdata.SeverityNumberDEBUG) + ib := byte(i) + traceID := [16]byte{0, 0, 0, 0, 0, 0, 0, 0, ib, ib, ib, ib, ib, ib, ib, ib} + record.SetTraceID(pdata.NewTraceID(traceID)) + // set half of records with a foo attribute + if i%2 == 0 { + record.Attributes().InsertBytes("foo", traceID[:]) + } + // set a fourth of records with a priority attribute + if i%4 == 0 { + record.Attributes().InsertDouble("priority", 100) + } } err = processor.ConsumeLogs(context.Background(), logs) require.NoError(t, err) @@ -149,7 +184,11 @@ func TestLogsSampling(t *testing.T) { if len(sunk) > 0 && sunk[0].ResourceLogs().Len() > 0 { numReceived = sunk[0].ResourceLogs().At(0).ScopeLogs().At(0).LogRecords().Len() } - assert.Equal(t, tt.received*5, numReceived) + assert.Equal(t, tt.received, numReceived) }) } } + +func boolPtr(b bool) *bool { + return &b +} diff --git a/processor/probabilisticsamplerprocessor/testdata/config.yaml b/processor/probabilisticsamplerprocessor/testdata/config.yaml index 5e8f19233d55..86568d5029cd 100644 --- a/processor/probabilisticsamplerprocessor/testdata/config.yaml +++ b/processor/probabilisticsamplerprocessor/testdata/config.yaml @@ -38,16 +38,12 @@ processors: # seeds at different layers ensures that sampling rate in each layer work as # intended. hash_seed: 22 - # Severity filters override the default sampling_percentage sampling rate. - severity: - # Override the default sampling_percentage for all log records of severity of error or higher - # to keep all logs. - - sampling_percentage: 100 - severity_level: error - # Override the default sampling_percentage for all log records of severity of warn or higher - # to keep 80% of logs. Note this doesn't override the error logs sampling rate. - - sampling_percentage: 80 - severity_level: warn + # sampling_source allows to use a log record attribute after the `foo` key + # to be used to compute the sampling hash of the log record instead of trace id, if trace id is absent or trace id sampling is disabled. + sampling_source: "foo" + # sampling_priority allows to use a log record attribute designed by the `bar` key + # to be used as the sampling priority of the log record. + sampling_priority: "bar" exporters: nop: diff --git a/processor/probabilisticsamplerprocessor/testdata/invalid.yaml b/processor/probabilisticsamplerprocessor/testdata/invalid.yaml index f252ae919cdb..ffd9b1e07d16 100644 --- a/processor/probabilisticsamplerprocessor/testdata/invalid.yaml +++ b/processor/probabilisticsamplerprocessor/testdata/invalid.yaml @@ -4,14 +4,8 @@ receivers: processors: probabilistic_sampler/logs: - sampling_percentage: 15.3 + sampling_percentage: -15.3 hash_seed: 22 - severity: - - sampling_percentage: 100 - severity_level: error - # Duplicate severity level sampling rate! - - sampling_percentage: 80 - severity_level: error exporters: nop: From b139384765110dab7f42b90f2987796554259d48 Mon Sep 17 00:00:00 2001 From: Antoine Toulme Date: Mon, 11 Apr 2022 14:38:40 -0700 Subject: [PATCH 4/5] fix lint --- .../logprobabilisticsampler.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/processor/probabilisticsamplerprocessor/logprobabilisticsampler.go b/processor/probabilisticsamplerprocessor/logprobabilisticsampler.go index b4f609c84232..5142509ff2aa 100644 --- a/processor/probabilisticsamplerprocessor/logprobabilisticsampler.go +++ b/processor/probabilisticsamplerprocessor/logprobabilisticsampler.go @@ -26,7 +26,7 @@ import ( type logsamplerprocessor struct { scaledSamplingRate uint32 hashSeed uint32 - traceIdEnabled bool + traceIDEnabled bool samplingSource string samplingPriority string } @@ -38,7 +38,7 @@ func newLogsProcessor(nextConsumer consumer.Logs, cfg *Config) (component.LogsPr lsp := &logsamplerprocessor{ scaledSamplingRate: uint32(cfg.SamplingPercentage * percentageScaleFactor), hashSeed: cfg.HashSeed, - traceIdEnabled: cfg.TraceIDEnabled == nil || *cfg.TraceIDEnabled, + traceIDEnabled: cfg.TraceIDEnabled == nil || *cfg.TraceIDEnabled, samplingPriority: cfg.SamplingPriority, samplingSource: cfg.SamplingSource, } @@ -57,7 +57,7 @@ func (lsp *logsamplerprocessor) processLogs(_ context.Context, ld pdata.Logs) (p // pick the sampling source. var lidBytes []byte - if lsp.traceIdEnabled && !l.TraceID().IsEmpty() { + if lsp.traceIDEnabled && !l.TraceID().IsEmpty() { value := l.TraceID().Bytes() lidBytes = value[:] } From 990212c7776f52c3ee60421eb5bb66083e2cedf1 Mon Sep 17 00:00:00 2001 From: Alex Boten Date: Thu, 14 Apr 2022 08:45:19 -0700 Subject: [PATCH 5/5] Update CHANGELOG.md --- CHANGELOG.md | 1 - 1 file changed, 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8af241c60ced..095c974726b2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,6 @@ ### 💡 Enhancements 💡 -- `cmd/mdatagen`: Update documentation generated for attributes to list enumerated values and show the "value" that will be visible on metrics when it is different from the attribute key in metadata.yaml (#8983) - `probabilistic_sampler`: Add ability to sample logs (#9118) ### 🧰 Bug fixes 🧰