From aa0198433a8e7cb90624a0a3b59b65a79471fa9a Mon Sep 17 00:00:00 2001 From: Shaun Remekie Date: Fri, 28 Jun 2024 16:01:42 +0100 Subject: [PATCH 01/46] added convert_exponential_hist_to_bucketed_hist function --- ...nvert_exponential_hist_to_bucketed_hist.go | 144 ++++++++++++++++++ 1 file changed, 144 insertions(+) create mode 100644 processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_bucketed_hist.go diff --git a/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_bucketed_hist.go b/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_bucketed_hist.go new file mode 100644 index 000000000000..b9d96b63dda8 --- /dev/null +++ b/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_bucketed_hist.go @@ -0,0 +1,144 @@ +package metrics + +import ( + "context" + "fmt" + + "go.opentelemetry.io/collector/pdata/pmetric" + + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl/contexts/ottlmetric" +) + +type convertExponentialHistToBucketedHistArguments struct { + ExplicitBounds []float64 +} + +func newConvertExponentialHistToBucketedHistFactory() ottl.Factory[ottlmetric.TransformContext] { + return ottl.NewFactory("convert_exponential_hist_to_bucketed_hist", &convertExponentialHistToBucketedHistArguments{}, createConvertExponentialHistToBucketedHistFunction) +} + +func createConvertExponentialHistToBucketedHistFunction(_ ottl.FunctionContext, oArgs ottl.Arguments) (ottl.ExprFunc[ottlmetric.TransformContext], error) { + args, ok := oArgs.(*convertExponentialHistToBucketedHistArguments) + + if !ok { + return nil, fmt.Errorf("ConvertExponentialHistToBucketedHistFactory args must be of type *ConvertExponentialHistToBucketedHistArguments") + } + + return convertExponentialHistToBucketedHist(args.ExplicitBounds) +} + +// convertExponentialHistToBucketedHist converts an exponential histogram to a bucketed histogram +func convertExponentialHistToBucketedHist(explicitBounds []float64) (ottl.ExprFunc[ottlmetric.TransformContext], error) { + + if len(explicitBounds) == 0 { + return nil, fmt.Errorf("explicit bounds must cannot be empty: %v", explicitBounds) + } + + return func(_ context.Context, tCtx ottlmetric.TransformContext) (any, error) { + metric := tCtx.GetMetric() + if metric.Type() != pmetric.MetricTypeExponentialHistogram { + return nil, nil + } + + // expHist := metric.ExponentialHistogram() + bucketedHist := pmetric.NewHistogram() + dps := metric.ExponentialHistogram().DataPoints() + bucketedHist.SetAggregationTemporality(metric.ExponentialHistogram().AggregationTemporality()) + + for i := 0; i < dps.Len(); i++ { + expDataPoint := dps.At(i) + bucketCounts := calculateBucketCounts(expDataPoint, explicitBounds) + bucketHistDatapoint := bucketedHist.DataPoints().AppendEmpty() + bucketHistDatapoint.SetStartTimestamp(expDataPoint.StartTimestamp()) + bucketHistDatapoint.SetTimestamp(expDataPoint.Timestamp()) + bucketHistDatapoint.SetCount(expDataPoint.Count()) + bucketHistDatapoint.SetSum(expDataPoint.Sum()) + bucketHistDatapoint.SetMin(expDataPoint.Min()) + bucketHistDatapoint.SetMax(expDataPoint.Max()) + bucketHistDatapoint.ExplicitBounds().FromRaw(explicitBounds) + bucketHistDatapoint.BucketCounts().FromRaw(bucketCounts) + expDataPoint.Attributes().CopyTo(bucketHistDatapoint.Attributes()) + } + + // create new metric and override metric + newMetric := pmetric.NewMetric() + newMetric.SetName(metric.Name()) + newMetric.SetDescription(metric.Description()) + newMetric.SetUnit(metric.Unit()) + bucketedHist.CopyTo(newMetric.SetEmptyHistogram()) + newMetric.CopyTo(metric) + + return nil, nil + }, nil +} + +// calculateBucketCounts calculates the bucket counts for the given exponential histogram data point +// func calculateBucketCounts(dp pmetric.ExponentialHistogramDataPoint, boundaries []float64) []uint64 { +// t := time.Now() +// fmt.Println("calculateBucketCounts called!", t) +// defer func() { fmt.Println("calculateBucketCounts returned!", time.Since(t)) }() +// bucketCounts := make([]uint64, len(boundaries)+1) // +1 for the overflow bucket +// scale := dp.Scale() +// currentValue := 1 << scale // 2^scale + +// fmt.Println("scale:", scale) +// fmt.Println("currentValue:", currentValue) +// fmt.Println("len of positive buckets:", dp.Positive().BucketCounts().Len()) + +// // Positive buckets +// for i := 0; i < int(dp.Positive().BucketCounts().Len()); i++ { +// // for _, count := range dataPoint.Positive().BucketCounts() { +// lowerBound := currentValue +// upperBound := currentValue*2 - 1 +// count := dp.Positive().BucketCounts().At(i) + +// fmt.Println(lowerBound, upperBound, count) + +// for value := lowerBound; value <= upperBound; value++ { +// for i, bound := range boundaries { +// if float64(value) <= bound { +// bucketCounts[i] += count +// break +// } +// if i == len(boundaries)-1 { +// bucketCounts[i+1] += count // Overflow bucket +// } +// } +// } + +// currentValue <<= 1 // Multiply by 2 for the next range +// } + +// return bucketCounts +// } + +func calculateBucketCounts(dp pmetric.ExponentialHistogramDataPoint, boundaries []float64) []uint64 { + bucketCounts := make([]uint64, len(boundaries)+1) // +1 for the overflow bucket + scale := dp.Scale() + cv := 1 << scale // 2^scale + currentValue := float64(cv) + + // Positive buckets + for i := 0; i < int(dp.Positive().BucketCounts().Len()); i++ { + count := dp.Positive().BucketCounts().At(i) + // lowerBound := currentValue + upperBound := currentValue * 2 + + // Find the bucket range and add the count + for j, boundary := range boundaries { + if upperBound <= boundary { + bucketCounts[j] += count + break + } + if j == len(boundaries)-1 { + bucketCounts[j+1] += count // Overflow bucket + } + } + + // Update currentValue to the next power of 2 + currentValue = upperBound + } + + return bucketCounts +} From da0d514b4b4acb9499aee3f2ec8e9d0a238f6da5 Mon Sep 17 00:00:00 2001 From: Shaun Remekie Date: Fri, 28 Jun 2024 16:02:13 +0100 Subject: [PATCH 02/46] added feature flag for new convert function --- .../internal/metrics/functions.go | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/processor/transformprocessor/internal/metrics/functions.go b/processor/transformprocessor/internal/metrics/functions.go index 162e208c362a..f533660d52db 100644 --- a/processor/transformprocessor/internal/metrics/functions.go +++ b/processor/transformprocessor/internal/metrics/functions.go @@ -18,6 +18,12 @@ var useConvertBetweenSumAndGaugeMetricContext = featuregate.GlobalRegistry().Mus featuregate.WithRegisterDescription("When enabled will use metric context for conversion between sum and gauge"), ) +var useConvertExponentialHistogramToBucketedHistogram = featuregate.GlobalRegistry().MustRegister( + "processor.transform.ConvertExponentialHistogramToBucketedHistogram", + featuregate.StageAlpha, + featuregate.WithRegisterDescription("When enabled will convert exponential histograms to bucketed histograms"), +) + func DataPointFunctions() map[string]ottl.Factory[ottldatapoint.TransformContext] { functions := ottlfuncs.StandardFuncs[ottldatapoint.TransformContext]() @@ -60,6 +66,14 @@ func MetricFunctions() map[string]ottl.Factory[ottlmetric.TransformContext] { } } + if useConvertExponentialHistogramToBucketedHistogram.IsEnabled() { + for _, f := range []ottl.Factory[ottlmetric.TransformContext]{ + newConvertExponentialHistToBucketedHistFactory(), + } { + metricFunctions[f.Name()] = f + } + } + for k, v := range metricFunctions { functions[k] = v } From 2bb4c6dc065c50a1761f76ebe9537feb5d205249 Mon Sep 17 00:00:00 2001 From: Shaun Remekie Date: Sun, 30 Jun 2024 12:36:43 +0100 Subject: [PATCH 03/46] added tests for convert_exponential_hist_to_bucketed_hist --- ..._exponential_hist_to_bucketed_hist_test.go | 151 ++++++++++++++++++ 1 file changed, 151 insertions(+) create mode 100644 processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_bucketed_hist_test.go diff --git a/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_bucketed_hist_test.go b/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_bucketed_hist_test.go new file mode 100644 index 000000000000..bef199033928 --- /dev/null +++ b/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_bucketed_hist_test.go @@ -0,0 +1,151 @@ +package metrics + +import ( + "strings" + "testing" + "time" + + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl/contexts/ottlmetric" + "github.com/stretchr/testify/assert" + "go.opentelemetry.io/collector/pdata/pcommon" + "go.opentelemetry.io/collector/pdata/pmetric" +) + +func Test_convert_exponential_hist_to_bucketed_hist(t *testing.T) { + exponentialHistInput := pmetric.NewMetric() + exponentialHistInput.SetName("response_time") + dp := exponentialHistInput.SetEmptyExponentialHistogram().DataPoints().AppendEmpty() + exponentialHistInput.ExponentialHistogram().SetAggregationTemporality(1) + dp.SetCount(2) + dp.SetScale(7) + dp.SetSum(361) + dp.SetMax(195) + dp.SetMin(166) + + ts := pcommon.NewTimestampFromTime(time.Now()) + dp.SetTimestamp(ts) + + // set attributes + dp.Attributes().PutStr("metric_type", "timing") + + // set bucket counts + dp.Positive().BucketCounts().Append( + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1) + + nonExponentialHist := func() pmetric.Metric { + m := pmetric.NewMetric() + m.SetName("not-exponentialhist") + m.SetEmptyGauge() + return m + } + + tests := []struct { + name string + input pmetric.Metric + arg []float64 // ExplicitBounds + want func(pmetric.Metric) + }{ + { + name: "convert exponential histogram to bucketed histogram", + input: exponentialHistInput, + arg: []float64{0.0, 10.0, 100.0, 1000.0}, + want: func(metric pmetric.Metric) { + + metric.SetName("response_time") + dp := metric.SetEmptyHistogram().DataPoints().AppendEmpty() + metric.Histogram().SetAggregationTemporality(1) + dp.SetCount(2) + dp.SetSum(361) + dp.SetMax(195) + dp.SetMin(166) + dp.SetTimestamp(ts) + + // set attributes + dp.Attributes().PutStr("metric_type", "timing") + + // set bucket counts + dp.BucketCounts().Append(0, 0, 0, 1, 1) + + // set explictbounds + dp.ExplicitBounds().Append(0.0, 10.0, 100.0, 1000.0) + + }, + }, + { + name: "non-expontential histogram given", + arg: []float64{0}, + input: nonExponentialHist(), + want: func(metric pmetric.Metric) { + nonExponentialHist().CopyTo(metric) + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + metric := pmetric.NewMetric() + tt.input.CopyTo(metric) + + ctx := ottlmetric.NewTransformContext(metric, pmetric.NewMetricSlice(), pcommon.NewInstrumentationScope(), pcommon.NewResource(), pmetric.NewScopeMetrics(), pmetric.NewResourceMetrics()) + + exprFunc, err := convertExponentialHistToBucketedHist(tt.arg) + assert.NoError(t, err) + _, err = exprFunc(nil, ctx) + assert.NoError(t, err) + + expected := pmetric.NewMetric() + tt.want(expected) + + assert.Equal(t, expected, metric) + }) + } +} + +func Test_convertExponentialHistToBucketedHist_validate(t *testing.T) { + tests := []struct { + name string + sliceExplicitBoundsArgs []float64 + }{ + { + name: "empty explicit bounds", + sliceExplicitBoundsArgs: []float64{}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + _, err := convertExponentialHistToBucketedHist(tt.sliceExplicitBoundsArgs) + assert.Error(t, err) + assert.True(t, strings.Contains(err.Error(), "explicit bounds must cannot be empty")) + }) + } +} From edcaa144f8982b63f7c378a5ead8345f1c841ede Mon Sep 17 00:00:00 2001 From: Shaun Remekie Date: Sun, 30 Jun 2024 12:37:04 +0100 Subject: [PATCH 04/46] removed commented code --- ...nvert_exponential_hist_to_bucketed_hist.go | 42 +------------------ 1 file changed, 2 insertions(+), 40 deletions(-) diff --git a/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_bucketed_hist.go b/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_bucketed_hist.go index b9d96b63dda8..f3463e707dfd 100644 --- a/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_bucketed_hist.go +++ b/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_bucketed_hist.go @@ -37,6 +37,8 @@ func convertExponentialHistToBucketedHist(explicitBounds []float64) (ottl.ExprFu return func(_ context.Context, tCtx ottlmetric.TransformContext) (any, error) { metric := tCtx.GetMetric() + + // only execute on exponential histograms if metric.Type() != pmetric.MetricTypeExponentialHistogram { return nil, nil } @@ -73,46 +75,6 @@ func convertExponentialHistToBucketedHist(explicitBounds []float64) (ottl.ExprFu }, nil } -// calculateBucketCounts calculates the bucket counts for the given exponential histogram data point -// func calculateBucketCounts(dp pmetric.ExponentialHistogramDataPoint, boundaries []float64) []uint64 { -// t := time.Now() -// fmt.Println("calculateBucketCounts called!", t) -// defer func() { fmt.Println("calculateBucketCounts returned!", time.Since(t)) }() -// bucketCounts := make([]uint64, len(boundaries)+1) // +1 for the overflow bucket -// scale := dp.Scale() -// currentValue := 1 << scale // 2^scale - -// fmt.Println("scale:", scale) -// fmt.Println("currentValue:", currentValue) -// fmt.Println("len of positive buckets:", dp.Positive().BucketCounts().Len()) - -// // Positive buckets -// for i := 0; i < int(dp.Positive().BucketCounts().Len()); i++ { -// // for _, count := range dataPoint.Positive().BucketCounts() { -// lowerBound := currentValue -// upperBound := currentValue*2 - 1 -// count := dp.Positive().BucketCounts().At(i) - -// fmt.Println(lowerBound, upperBound, count) - -// for value := lowerBound; value <= upperBound; value++ { -// for i, bound := range boundaries { -// if float64(value) <= bound { -// bucketCounts[i] += count -// break -// } -// if i == len(boundaries)-1 { -// bucketCounts[i+1] += count // Overflow bucket -// } -// } -// } - -// currentValue <<= 1 // Multiply by 2 for the next range -// } - -// return bucketCounts -// } - func calculateBucketCounts(dp pmetric.ExponentialHistogramDataPoint, boundaries []float64) []uint64 { bucketCounts := make([]uint64, len(boundaries)+1) // +1 for the overflow bucket scale := dp.Scale() From 7f8878e686866fa2855263814334596fbaeefc09 Mon Sep 17 00:00:00 2001 From: Shaun Remekie Date: Mon, 1 Jul 2024 15:00:34 +0200 Subject: [PATCH 05/46] add changelog --- ...stom_func_to_convert_exponential_hist.yaml | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 .chloggen/transformprocessor_add_custom_func_to_convert_exponential_hist.yaml diff --git a/.chloggen/transformprocessor_add_custom_func_to_convert_exponential_hist.yaml b/.chloggen/transformprocessor_add_custom_func_to_convert_exponential_hist.yaml new file mode 100644 index 000000000000..42ce0e59d7b8 --- /dev/null +++ b/.chloggen/transformprocessor_add_custom_func_to_convert_exponential_hist.yaml @@ -0,0 +1,27 @@ +# Use this changelog template to create an entry for release notes. + +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: enhancement + +# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver) +component: processor/transform + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: + +# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. +issues: [33827] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: + +# If your change doesn't affect end users or the exported elements of any package, +# you should instead start your pull request title with [chore] or use the "Skip Changelog" label. +# Optional: The change log or logs in which this entry should be included. +# e.g. '[user]' or '[user, api]' +# Include 'user' if the change is relevant to end users. +# Include 'api' if there is a change to a library API. +# Default: '[user]' +change_logs: [] From 05d6a6ba495f82aff497e452c14708dbe1f5d2d7 Mon Sep 17 00:00:00 2001 From: Shaun Remekie Date: Mon, 1 Jul 2024 21:47:27 +0200 Subject: [PATCH 06/46] renamed function adjusted exponential conversion algorithm --- ...vert_exponential_hist_to_explicit_hist.go} | 67 +++++++++++-------- 1 file changed, 39 insertions(+), 28 deletions(-) rename processor/transformprocessor/internal/metrics/{func_convert_exponential_hist_to_bucketed_hist.go => func_convert_exponential_hist_to_explicit_hist.go} (56%) diff --git a/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_bucketed_hist.go b/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist.go similarity index 56% rename from processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_bucketed_hist.go rename to processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist.go index f3463e707dfd..402b1ea4af36 100644 --- a/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_bucketed_hist.go +++ b/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist.go @@ -3,6 +3,7 @@ package metrics import ( "context" "fmt" + "math" "go.opentelemetry.io/collector/pdata/pmetric" @@ -10,26 +11,26 @@ import ( "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl/contexts/ottlmetric" ) -type convertExponentialHistToBucketedHistArguments struct { +type convertExponentialHistToExplicitHistArguments struct { ExplicitBounds []float64 } -func newConvertExponentialHistToBucketedHistFactory() ottl.Factory[ottlmetric.TransformContext] { - return ottl.NewFactory("convert_exponential_hist_to_bucketed_hist", &convertExponentialHistToBucketedHistArguments{}, createConvertExponentialHistToBucketedHistFunction) +func newconvertExponentialHistToExplicitHistFactory() ottl.Factory[ottlmetric.TransformContext] { + return ottl.NewFactory("convert_exponential_hist_to_explicit_hist", &convertExponentialHistToExplicitHistArguments{}, createconvertExponentialHistToExplicitHistFunction) } -func createConvertExponentialHistToBucketedHistFunction(_ ottl.FunctionContext, oArgs ottl.Arguments) (ottl.ExprFunc[ottlmetric.TransformContext], error) { - args, ok := oArgs.(*convertExponentialHistToBucketedHistArguments) +func createconvertExponentialHistToExplicitHistFunction(_ ottl.FunctionContext, oArgs ottl.Arguments) (ottl.ExprFunc[ottlmetric.TransformContext], error) { + args, ok := oArgs.(*convertExponentialHistToExplicitHistArguments) if !ok { - return nil, fmt.Errorf("ConvertExponentialHistToBucketedHistFactory args must be of type *ConvertExponentialHistToBucketedHistArguments") + return nil, fmt.Errorf("convertExponentialHistToExplicitHistFactory args must be of type *convertExponentialHistToExplicitHistArguments") } - return convertExponentialHistToBucketedHist(args.ExplicitBounds) + return convertExponentialHistToExplicitHist(args.ExplicitBounds) } -// convertExponentialHistToBucketedHist converts an exponential histogram to a bucketed histogram -func convertExponentialHistToBucketedHist(explicitBounds []float64) (ottl.ExprFunc[ottlmetric.TransformContext], error) { +// convertExponentialHistToExplicitHist converts an exponential histogram to a bucketed histogram +func convertExponentialHistToExplicitHist(explicitBounds []float64) (ottl.ExprFunc[ottlmetric.TransformContext], error) { if len(explicitBounds) == 0 { return nil, fmt.Errorf("explicit bounds must cannot be empty: %v", explicitBounds) @@ -75,31 +76,41 @@ func convertExponentialHistToBucketedHist(explicitBounds []float64) (ottl.ExprFu }, nil } -func calculateBucketCounts(dp pmetric.ExponentialHistogramDataPoint, boundaries []float64) []uint64 { - bucketCounts := make([]uint64, len(boundaries)+1) // +1 for the overflow bucket - scale := dp.Scale() - cv := 1 << scale // 2^scale - currentValue := float64(cv) - - // Positive buckets - for i := 0; i < int(dp.Positive().BucketCounts().Len()); i++ { - count := dp.Positive().BucketCounts().At(i) - // lowerBound := currentValue - upperBound := currentValue * 2 - - // Find the bucket range and add the count - for j, boundary := range boundaries { - if upperBound <= boundary { +// calculateBucketCounts calculates the bucket counts for a given exponential histogram data point +// the algorithm is based on the OpenTelemetry Collector implementation +// +// - base is calculated as 2^-scale +// +// - the base is then used to calculate the upper bound of the bucket +// which is calculated as base^(index+1) +// +// - the index is calculated, by adding the offset to the positive bucket index +// +// - the upper limit is the exponential of the index+1 times the base +// +// - upper bound is used to determine which of the explicit bounds the bucket count falls into +func calculateBucketCounts(dp pmetric.ExponentialHistogramDataPoint, bounderies []float64) []uint64 { + scale := int(dp.Scale()) + base := math.Ldexp(math.Ln2, -scale) + + // negB := dp.Negative().BucketCounts() + posB := dp.Positive().BucketCounts() + bucketCounts := make([]uint64, len(bounderies)+1) // +1 for the overflow bucket + + for pos := 0; pos < posB.Len(); pos++ { + index := dp.Positive().Offset() + int32(pos) + upper := math.Exp(float64(index+1) * base) + count := posB.At(pos) + + for j, boundary := range bounderies { + if upper <= boundary { bucketCounts[j] += count break } - if j == len(boundaries)-1 { + if j == len(bounderies)-1 { bucketCounts[j+1] += count // Overflow bucket } } - - // Update currentValue to the next power of 2 - currentValue = upperBound } return bucketCounts From e9d8affad99339f1814792a22554d6006f4849e8 Mon Sep 17 00:00:00 2001 From: Shaun Remekie Date: Mon, 1 Jul 2024 21:48:11 +0200 Subject: [PATCH 07/46] renamed and fixed test --- ...ert_exponential_hist_to_explicit_hist_test.go} | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) rename processor/transformprocessor/internal/metrics/{func_convert_exponential_hist_to_bucketed_hist_test.go => func_convert_exponential_hist_to_explicit_hist_test.go} (86%) diff --git a/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_bucketed_hist_test.go b/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist_test.go similarity index 86% rename from processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_bucketed_hist_test.go rename to processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist_test.go index bef199033928..112c15618834 100644 --- a/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_bucketed_hist_test.go +++ b/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist_test.go @@ -11,7 +11,7 @@ import ( "go.opentelemetry.io/collector/pdata/pmetric" ) -func Test_convert_exponential_hist_to_bucketed_hist(t *testing.T) { +func Test_convert_exponential_hist_to_explicit_hist(t *testing.T) { exponentialHistInput := pmetric.NewMetric() exponentialHistInput.SetName("response_time") dp := exponentialHistInput.SetEmptyExponentialHistogram().DataPoints().AppendEmpty() @@ -61,6 +61,7 @@ func Test_convert_exponential_hist_to_bucketed_hist(t *testing.T) { 0, 1) + dp.Positive().SetOffset(944) nonExponentialHist := func() pmetric.Metric { m := pmetric.NewMetric() m.SetName("not-exponentialhist") @@ -77,7 +78,7 @@ func Test_convert_exponential_hist_to_bucketed_hist(t *testing.T) { { name: "convert exponential histogram to bucketed histogram", input: exponentialHistInput, - arg: []float64{0.0, 10.0, 100.0, 1000.0}, + arg: []float64{160.0, 170.0, 180.0, 190.0, 200.0}, want: func(metric pmetric.Metric) { metric.SetName("response_time") @@ -93,10 +94,10 @@ func Test_convert_exponential_hist_to_bucketed_hist(t *testing.T) { dp.Attributes().PutStr("metric_type", "timing") // set bucket counts - dp.BucketCounts().Append(0, 0, 0, 1, 1) + dp.BucketCounts().Append(0, 1, 0, 0, 1, 0) // set explictbounds - dp.ExplicitBounds().Append(0.0, 10.0, 100.0, 1000.0) + dp.ExplicitBounds().Append(160.0, 170.0, 180.0, 190.0, 200.0) }, }, @@ -117,7 +118,7 @@ func Test_convert_exponential_hist_to_bucketed_hist(t *testing.T) { ctx := ottlmetric.NewTransformContext(metric, pmetric.NewMetricSlice(), pcommon.NewInstrumentationScope(), pcommon.NewResource(), pmetric.NewScopeMetrics(), pmetric.NewResourceMetrics()) - exprFunc, err := convertExponentialHistToBucketedHist(tt.arg) + exprFunc, err := convertExponentialHistToExplicitHist(tt.arg) assert.NoError(t, err) _, err = exprFunc(nil, ctx) assert.NoError(t, err) @@ -130,7 +131,7 @@ func Test_convert_exponential_hist_to_bucketed_hist(t *testing.T) { } } -func Test_convertExponentialHistToBucketedHist_validate(t *testing.T) { +func Test_convertExponentialHistToExplicitHist_validate(t *testing.T) { tests := []struct { name string sliceExplicitBoundsArgs []float64 @@ -143,7 +144,7 @@ func Test_convertExponentialHistToBucketedHist_validate(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - _, err := convertExponentialHistToBucketedHist(tt.sliceExplicitBoundsArgs) + _, err := convertExponentialHistToExplicitHist(tt.sliceExplicitBoundsArgs) assert.Error(t, err) assert.True(t, strings.Contains(err.Error(), "explicit bounds must cannot be empty")) }) From 9ce3395f846c4c335778533e75b81b20988cd420 Mon Sep 17 00:00:00 2001 From: Shaun Remekie Date: Mon, 1 Jul 2024 21:48:41 +0200 Subject: [PATCH 08/46] changed function name --- processor/transformprocessor/internal/metrics/functions.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/processor/transformprocessor/internal/metrics/functions.go b/processor/transformprocessor/internal/metrics/functions.go index f533660d52db..29766b8fc953 100644 --- a/processor/transformprocessor/internal/metrics/functions.go +++ b/processor/transformprocessor/internal/metrics/functions.go @@ -68,7 +68,7 @@ func MetricFunctions() map[string]ottl.Factory[ottlmetric.TransformContext] { if useConvertExponentialHistogramToBucketedHistogram.IsEnabled() { for _, f := range []ottl.Factory[ottlmetric.TransformContext]{ - newConvertExponentialHistToBucketedHistFactory(), + newconvertExponentialHistToExplicitHistFactory(), } { metricFunctions[f.Name()] = f } From 7fa713fcc9f8b39d08dd9502a8fc820d405cb88f Mon Sep 17 00:00:00 2001 From: Shaun Remekie Date: Mon, 1 Jul 2024 22:08:02 +0200 Subject: [PATCH 09/46] updated README --- processor/transformprocessor/README.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/processor/transformprocessor/README.md b/processor/transformprocessor/README.md index daab05325aec..1be24d5ff35c 100644 --- a/processor/transformprocessor/README.md +++ b/processor/transformprocessor/README.md @@ -212,6 +212,7 @@ In addition to OTTL functions, the processor defines its own functions to help w - [convert_summary_count_val_to_sum](#convert_summary_count_val_to_sum) - [convert_summary_sum_val_to_sum](#convert_summary_sum_val_to_sum) - [copy_metric](#copy_metric) +- [convert_exponential_hist_to_explicit_hist](#convert_exponential_hist_to_explicit_hist) ### convert_sum_to_gauge @@ -347,6 +348,19 @@ Examples: - `copy_metric(desc="new desc") where description == "old desc"` +### convert_exponential_hist_to_explicit_hist + +`convert_exponential_hist_to_explicit_hist([ExplicitBounds])` + +the `convert_exponential_hist_to_explicit_hist` function converts an ExponentialHistogram to an Explicit (_normal_) Histogram. + +`ExplicitBounds` is represents the list of bucket boundaries for the new histogram. This argument is __required__ and __cannot be empty__. + +__WARNING:__ + +The process of converting an ExponentialHistogram to an Explicit Histogram is not perfect and may result in a loss of precision. It is important to define an appropriate set of bucket boundaries to minimize this loss. For example, selecting Boundaries that are too high or too low may result histogram buckets that are too wide or too narrow, respectively. + + ## Examples ### Perform transformation if field does not exist From 1f20a5f89bbc7373f40402cad17a09171b5fc047 Mon Sep 17 00:00:00 2001 From: Shaun Remekie Date: Mon, 1 Jul 2024 22:13:13 +0200 Subject: [PATCH 10/46] readme --- processor/transformprocessor/README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/processor/transformprocessor/README.md b/processor/transformprocessor/README.md index 1be24d5ff35c..d12b18da68b0 100644 --- a/processor/transformprocessor/README.md +++ b/processor/transformprocessor/README.md @@ -360,6 +360,9 @@ __WARNING:__ The process of converting an ExponentialHistogram to an Explicit Histogram is not perfect and may result in a loss of precision. It is important to define an appropriate set of bucket boundaries to minimize this loss. For example, selecting Boundaries that are too high or too low may result histogram buckets that are too wide or too narrow, respectively. +Example: + +- `convert_exponential_hist_to_explicit_hist([10.0, 100.0, 1000.0, 10000.0])` ## Examples From 5e26f7da345f78fff20848271b414188dd12b856 Mon Sep 17 00:00:00 2001 From: Shaun Remekie Date: Mon, 1 Jul 2024 22:19:37 +0200 Subject: [PATCH 11/46] added warning about usage --- processor/transformprocessor/README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/processor/transformprocessor/README.md b/processor/transformprocessor/README.md index d12b18da68b0..c1ae831a44f8 100644 --- a/processor/transformprocessor/README.md +++ b/processor/transformprocessor/README.md @@ -360,6 +360,8 @@ __WARNING:__ The process of converting an ExponentialHistogram to an Explicit Histogram is not perfect and may result in a loss of precision. It is important to define an appropriate set of bucket boundaries to minimize this loss. For example, selecting Boundaries that are too high or too low may result histogram buckets that are too wide or too narrow, respectively. +This function should only be used when Exponential Histograms are not suitable for the downstream consumers or if upstream metric sources are unable to generate Explicit Histograms. + Example: - `convert_exponential_hist_to_explicit_hist([10.0, 100.0, 1000.0, 10000.0])` From 911673fd7901c611ae25e0925e6cbbee9f84aeac Mon Sep 17 00:00:00 2001 From: Shaun Remekie Date: Tue, 2 Jul 2024 16:53:35 +0200 Subject: [PATCH 12/46] add more testx --- ..._exponential_hist_to_explicit_hist_test.go | 131 +++++++++++++++++- 1 file changed, 130 insertions(+), 1 deletion(-) diff --git a/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist_test.go b/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist_test.go index 112c15618834..d14cd761f1cb 100644 --- a/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist_test.go +++ b/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist_test.go @@ -76,7 +76,90 @@ func Test_convert_exponential_hist_to_explicit_hist(t *testing.T) { want func(pmetric.Metric) }{ { - name: "convert exponential histogram to bucketed histogram", + // having explicit bounds that are all smaller than the exponential histogram's scale + // will results in all the exponential histogram's data points being placed in the overflow bucket + name: "convert exponential histogram to explicit histogram with smaller bounds", + input: exponentialHistInput, + arg: []float64{1.0, 2.0, 3.0, 4.0, 5.0}, + want: func(metric pmetric.Metric) { + + metric.SetName("response_time") + dp := metric.SetEmptyHistogram().DataPoints().AppendEmpty() + metric.Histogram().SetAggregationTemporality(1) + dp.SetCount(2) + dp.SetSum(361) + dp.SetMax(195) + dp.SetMin(166) + dp.SetTimestamp(ts) + + // set attributes + dp.Attributes().PutStr("metric_type", "timing") + + // set bucket counts + dp.BucketCounts().Append(0, 0, 0, 0, 0, 2) // expect all counts in the overflow bucket + + // set explictbounds + dp.ExplicitBounds().Append(1.0, 2.0, 3.0, 4.0, 5.0) + + }, + }, + { + // having explicit bounds that are all larger than the exponential histogram's scale + // will results in all the exponential histogram's data points being placed in the 1st bucket + name: "convert exponential histogram to explicit histogram with large bounds", + input: exponentialHistInput, + arg: []float64{1000.0, 2000.0, 3000.0, 4000.0, 5000.0}, + want: func(metric pmetric.Metric) { + + metric.SetName("response_time") + dp := metric.SetEmptyHistogram().DataPoints().AppendEmpty() + metric.Histogram().SetAggregationTemporality(1) + dp.SetCount(2) + dp.SetSum(361) + dp.SetMax(195) + dp.SetMin(166) + dp.SetTimestamp(ts) + + // set attributes + dp.Attributes().PutStr("metric_type", "timing") + + // set bucket counts + dp.BucketCounts().Append(2, 0, 0, 0, 0, 0) // expect all counts in the 1st bucket + + // set explictbounds + dp.ExplicitBounds().Append(1000.0, 2000.0, 3000.0, 4000.0, 5000.0) + + }, + }, + { + + name: "convert exponential histogram to explicit history", + input: exponentialHistInput, + arg: []float64{160.0, 170.0, 180.0, 190.0, 200.0}, + want: func(metric pmetric.Metric) { + + metric.SetName("response_time") + dp := metric.SetEmptyHistogram().DataPoints().AppendEmpty() + metric.Histogram().SetAggregationTemporality(1) + dp.SetCount(2) + dp.SetSum(361) + dp.SetMax(195) + dp.SetMin(166) + dp.SetTimestamp(ts) + + // set attributes + dp.Attributes().PutStr("metric_type", "timing") + + // set bucket counts + dp.BucketCounts().Append(0, 1, 0, 0, 1, 0) + + // set explictbounds + dp.ExplicitBounds().Append(160.0, 170.0, 180.0, 190.0, 200.0) + + }, + }, + { + name: "convert exponential histogram to explicit history with 0 scale", input: exponentialHistInput, arg: []float64{160.0, 170.0, 180.0, 190.0, 200.0}, want: func(metric pmetric.Metric) { @@ -101,6 +184,52 @@ func Test_convert_exponential_hist_to_explicit_hist(t *testing.T) { }, }, + { + // 0 scale exponential histogram will result in an extremely large upper bound + // resulting in all the counts being in buckets much larger than the explicit bounds + // thus all counts will be in the overflow bucket + name: "0 scale expontential histogram given", + input: func() pmetric.Metric { + m := pmetric.NewMetric() + exponentialHistInput.CopyTo(m) + m.ExponentialHistogram().DataPoints().At(0).SetScale(0) + return m + }(), + arg: []float64{160.0, 170.0, 180.0, 190.0, 200.0}, + want: func(metric pmetric.Metric) { + metric.SetName("response_time") + dp := metric.SetEmptyHistogram().DataPoints().AppendEmpty() + metric.Histogram().SetAggregationTemporality(1) + dp.SetCount(2) + dp.SetSum(361) + dp.SetMax(195) + dp.SetMin(166) + dp.SetTimestamp(ts) + + // set attributes + dp.Attributes().PutStr("metric_type", "timing") + + // set bucket counts + dp.BucketCounts().Append(0, 0, 0, 0, 0, 2) + + // set explictbounds + dp.ExplicitBounds().Append(160.0, 170.0, 180.0, 190.0, 200.0) + }, + }, + { + name: "empty expontential histogram given", + input: func() pmetric.Metric { + m := pmetric.NewMetric() + m.SetName("empty") + m.SetEmptyExponentialHistogram() + return m + }(), + arg: []float64{160.0, 170.0, 180.0, 190.0, 200.0}, + want: func(metric pmetric.Metric) { + metric.SetName("empty") + metric.SetEmptyHistogram() + }, + }, { name: "non-expontential histogram given", arg: []float64{0}, From d80c080239490c43768c3cebd74dcc3980da204a Mon Sep 17 00:00:00 2001 From: Shaun Remekie Date: Tue, 2 Jul 2024 16:54:08 +0200 Subject: [PATCH 13/46] updated function comments --- ...nvert_exponential_hist_to_explicit_hist.go | 27 ++++++++++--------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist.go b/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist.go index 402b1ea4af36..d2b79a499ca9 100644 --- a/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist.go +++ b/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist.go @@ -44,11 +44,11 @@ func convertExponentialHistToExplicitHist(explicitBounds []float64) (ottl.ExprFu return nil, nil } - // expHist := metric.ExponentialHistogram() bucketedHist := pmetric.NewHistogram() dps := metric.ExponentialHistogram().DataPoints() bucketedHist.SetAggregationTemporality(metric.ExponentialHistogram().AggregationTemporality()) + // map over each exponential histogram data point and calculate the bucket counts for i := 0; i < dps.Len(); i++ { expDataPoint := dps.At(i) bucketCounts := calculateBucketCounts(expDataPoint, explicitBounds) @@ -76,30 +76,31 @@ func convertExponentialHistToExplicitHist(explicitBounds []float64) (ottl.ExprFu }, nil } -// calculateBucketCounts calculates the bucket counts for a given exponential histogram data point -// the algorithm is based on the OpenTelemetry Collector implementation +// calculateBucketCounts function calculates the bucket counts for a given exponential histogram data point. +// The algorithm is inspired by the logExponentialHistogramDataPoints function used to Print Exponential Histograms in Otel. +// found here: https://github.com/open-telemetry/opentelemetry-collector/blob/main/exporter/internal/otlptext/databuffer.go#L144-L201 // -// - base is calculated as 2^-scale +// - factor is calculated as math.Ldexp(math.Ln2, -scale) // -// - the base is then used to calculate the upper bound of the bucket -// which is calculated as base^(index+1) +// - next we iterate the bucket counts and positions (pos) in the exponential histogram datapoint. // -// - the index is calculated, by adding the offset to the positive bucket index +// - the index is calculated by adding the exponential offset to the positive bucket position (pos) // -// - the upper limit is the exponential of the index+1 times the base +// - the factor is then used to calculate the upper bound of the bucket which is calculated as +// upper = math.Exp((index+1) * factor) // -// - upper bound is used to determine which of the explicit bounds the bucket count falls into +// - At this point we know that the upper bound represents the highest value that can be in this bucket, so we take the +// upper bound and compare it to each of the explicit boundaries provided by the user until we find a boundary +// that fits, that is, the first instance where upper bound <= explicit boundary. func calculateBucketCounts(dp pmetric.ExponentialHistogramDataPoint, bounderies []float64) []uint64 { scale := int(dp.Scale()) - base := math.Ldexp(math.Ln2, -scale) - - // negB := dp.Negative().BucketCounts() + factor := math.Ldexp(math.Ln2, -scale) posB := dp.Positive().BucketCounts() bucketCounts := make([]uint64, len(bounderies)+1) // +1 for the overflow bucket for pos := 0; pos < posB.Len(); pos++ { index := dp.Positive().Offset() + int32(pos) - upper := math.Exp(float64(index+1) * base) + upper := math.Exp(float64(index+1) * factor) count := posB.At(pos) for j, boundary := range bounderies { From cdd8ee95fac118664c015ee3391a192daae0d258 Mon Sep 17 00:00:00 2001 From: Shaun Remekie Date: Tue, 2 Jul 2024 17:05:42 +0200 Subject: [PATCH 14/46] updated feature-gate flag name --- processor/transformprocessor/internal/metrics/functions.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/processor/transformprocessor/internal/metrics/functions.go b/processor/transformprocessor/internal/metrics/functions.go index 29766b8fc953..31ee1c42715f 100644 --- a/processor/transformprocessor/internal/metrics/functions.go +++ b/processor/transformprocessor/internal/metrics/functions.go @@ -18,8 +18,8 @@ var useConvertBetweenSumAndGaugeMetricContext = featuregate.GlobalRegistry().Mus featuregate.WithRegisterDescription("When enabled will use metric context for conversion between sum and gauge"), ) -var useConvertExponentialHistogramToBucketedHistogram = featuregate.GlobalRegistry().MustRegister( - "processor.transform.ConvertExponentialHistogramToBucketedHistogram", +var useConvertExponentialHistogramToExplicitHistogram = featuregate.GlobalRegistry().MustRegister( + "processor.transform.ConvertExponentialHistogramToExplicitHistogram", featuregate.StageAlpha, featuregate.WithRegisterDescription("When enabled will convert exponential histograms to bucketed histograms"), ) @@ -66,7 +66,7 @@ func MetricFunctions() map[string]ottl.Factory[ottlmetric.TransformContext] { } } - if useConvertExponentialHistogramToBucketedHistogram.IsEnabled() { + if useConvertExponentialHistogramToExplicitHistogram.IsEnabled() { for _, f := range []ottl.Factory[ottlmetric.TransformContext]{ newconvertExponentialHistToExplicitHistFactory(), } { From d4f9032d39b64665f57438e1384763aa21feac4e Mon Sep 17 00:00:00 2001 From: Shaun Remekie Date: Wed, 3 Jul 2024 10:57:00 +0200 Subject: [PATCH 15/46] adjust feature gate description --- processor/transformprocessor/internal/metrics/functions.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/processor/transformprocessor/internal/metrics/functions.go b/processor/transformprocessor/internal/metrics/functions.go index 31ee1c42715f..388bf7c4a761 100644 --- a/processor/transformprocessor/internal/metrics/functions.go +++ b/processor/transformprocessor/internal/metrics/functions.go @@ -21,7 +21,7 @@ var useConvertBetweenSumAndGaugeMetricContext = featuregate.GlobalRegistry().Mus var useConvertExponentialHistogramToExplicitHistogram = featuregate.GlobalRegistry().MustRegister( "processor.transform.ConvertExponentialHistogramToExplicitHistogram", featuregate.StageAlpha, - featuregate.WithRegisterDescription("When enabled will convert exponential histograms to bucketed histograms"), + featuregate.WithRegisterDescription("When enabled will use metric context for conversion of exponential histograms to explicit histograms"), ) func DataPointFunctions() map[string]ottl.Factory[ottldatapoint.TransformContext] { From 8baa474f07650481a633960500902bdd12b57972 Mon Sep 17 00:00:00 2001 From: Shaun Remekie Date: Thu, 11 Jul 2024 14:20:46 +0200 Subject: [PATCH 16/46] fixed typos --- .../func_convert_exponential_hist_to_explicit_hist.go | 11 +++++++---- ..._convert_exponential_hist_to_explicit_hist_test.go | 3 +++ 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist.go b/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist.go index d2b79a499ca9..d010a0be04ba 100644 --- a/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist.go +++ b/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist.go @@ -1,3 +1,6 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + package metrics import ( @@ -92,23 +95,23 @@ func convertExponentialHistToExplicitHist(explicitBounds []float64) (ottl.ExprFu // - At this point we know that the upper bound represents the highest value that can be in this bucket, so we take the // upper bound and compare it to each of the explicit boundaries provided by the user until we find a boundary // that fits, that is, the first instance where upper bound <= explicit boundary. -func calculateBucketCounts(dp pmetric.ExponentialHistogramDataPoint, bounderies []float64) []uint64 { +func calculateBucketCounts(dp pmetric.ExponentialHistogramDataPoint, boundaries []float64) []uint64 { scale := int(dp.Scale()) factor := math.Ldexp(math.Ln2, -scale) posB := dp.Positive().BucketCounts() - bucketCounts := make([]uint64, len(bounderies)+1) // +1 for the overflow bucket + bucketCounts := make([]uint64, len(boundaries)+1) // +1 for the overflow bucket for pos := 0; pos < posB.Len(); pos++ { index := dp.Positive().Offset() + int32(pos) upper := math.Exp(float64(index+1) * factor) count := posB.At(pos) - for j, boundary := range bounderies { + for j, boundary := range boundaries { if upper <= boundary { bucketCounts[j] += count break } - if j == len(bounderies)-1 { + if j == len(boundaries)-1 { bucketCounts[j+1] += count // Overflow bucket } } diff --git a/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist_test.go b/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist_test.go index d14cd761f1cb..ad642dce9542 100644 --- a/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist_test.go +++ b/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist_test.go @@ -1,3 +1,6 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + package metrics import ( From d6cf406cf76b38292b614830befd7150f79dc137 Mon Sep 17 00:00:00 2001 From: Shaun Remekie Date: Thu, 11 Jul 2024 17:33:32 +0200 Subject: [PATCH 17/46] added chloggen --- .chloggen/cds-1320.yaml | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 .chloggen/cds-1320.yaml diff --git a/.chloggen/cds-1320.yaml b/.chloggen/cds-1320.yaml new file mode 100644 index 000000000000..bdca6d2069e8 --- /dev/null +++ b/.chloggen/cds-1320.yaml @@ -0,0 +1,27 @@ +# Use this changelog template to create an entry for release notes. + +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: 'enhancement' + +# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver) +component: processor/transform + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: "Add custom function to the transform processor to convert exponential histograms to explicit histograms." + +# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. +issues: [33827] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: + +# If your change doesn't affect end users or the exported elements of any package, +# you should instead start your pull request title with [chore] or use the "Skip Changelog" label. +# Optional: The change log or logs in which this entry should be included. +# e.g. '[user]' or '[user, api]' +# Include 'user' if the change is relevant to end users. +# Include 'api' if there is a change to a library API. +# Default: '[user]' +change_logs: [user] From 9e3cbfa544602aeb53d05d614a4962e9b4fb9fcc Mon Sep 17 00:00:00 2001 From: Shaun Remekie Date: Mon, 15 Jul 2024 13:31:22 +0200 Subject: [PATCH 18/46] removed duplicate change log --- ...stom_func_to_convert_exponential_hist.yaml | 27 ------------------- 1 file changed, 27 deletions(-) delete mode 100644 .chloggen/transformprocessor_add_custom_func_to_convert_exponential_hist.yaml diff --git a/.chloggen/transformprocessor_add_custom_func_to_convert_exponential_hist.yaml b/.chloggen/transformprocessor_add_custom_func_to_convert_exponential_hist.yaml deleted file mode 100644 index 42ce0e59d7b8..000000000000 --- a/.chloggen/transformprocessor_add_custom_func_to_convert_exponential_hist.yaml +++ /dev/null @@ -1,27 +0,0 @@ -# Use this changelog template to create an entry for release notes. - -# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' -change_type: enhancement - -# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver) -component: processor/transform - -# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). -note: - -# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. -issues: [33827] - -# (Optional) One or more lines of additional information to render under the primary note. -# These lines will be padded with 2 spaces and then inserted directly into the document. -# Use pipe (|) for multiline entries. -subtext: - -# If your change doesn't affect end users or the exported elements of any package, -# you should instead start your pull request title with [chore] or use the "Skip Changelog" label. -# Optional: The change log or logs in which this entry should be included. -# e.g. '[user]' or '[user, api]' -# Include 'user' if the change is relevant to end users. -# Include 'api' if there is a change to a library API. -# Default: '[user]' -change_logs: [] From a78729bf142c710eb1d279f55251b6fa650c3fa2 Mon Sep 17 00:00:00 2001 From: Shaun Remekie Date: Wed, 31 Jul 2024 00:20:54 +0200 Subject: [PATCH 19/46] updated readme with addition distribution approaches --- processor/transformprocessor/README.md | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/processor/transformprocessor/README.md b/processor/transformprocessor/README.md index c1ae831a44f8..3f25597d74ad 100644 --- a/processor/transformprocessor/README.md +++ b/processor/transformprocessor/README.md @@ -354,17 +354,31 @@ Examples: the `convert_exponential_hist_to_explicit_hist` function converts an ExponentialHistogram to an Explicit (_normal_) Histogram. -`ExplicitBounds` is represents the list of bucket boundaries for the new histogram. This argument is __required__ and __cannot be empty__. +This function requires 2 arguments: + +- `distribution` - This argument is defines the convertion algorithm used to distribute the exponential datapoints into a new Explicit Histogram. There are 4 distribution options: +
+ - __upper__ - This approach identifies the highest possible value of each exponential bucket (_the upper bound_) and uses it to distribute the datapoints by comparing the upper bound of each bucket with the ExplicitBounds. This approach works better for small/narrow exponential histograms. +
+ - __midpoint__ - This approach works in a similar way to the __upper__ approach, but instead of using the upper bound, it uses the midpoint of each exponential bucket. This approach also works better for small/narrow exponential histograms. +
+ - __uniform__ - This approach distributes the datapoints for each bucket uniformly across the __ExplicitBounds__. This approach works better for large/wide exponential histograms. +
+ - __random__ - This approach distributes the datapoints for each bucket randomly across the __ExplicitBounds__. This approach works better for large/wide exponential histograms. +
+- `ExplicitBounds` represents the list of bucket boundaries for the new histogram. This argument is __required__ and __cannot be empty__. __WARNING:__ -The process of converting an ExponentialHistogram to an Explicit Histogram is not perfect and may result in a loss of precision. It is important to define an appropriate set of bucket boundaries to minimize this loss. For example, selecting Boundaries that are too high or too low may result histogram buckets that are too wide or too narrow, respectively. +The process of converting an ExponentialHistogram to an Explicit Histogram is not perfect and may result in a loss of precision. It is important to define an appropriate set of bucket boundaries and identify the best distribution approach for your data in order to minimize this loss. + +For example, selecting Boundaries that are too high or too low may result histogram buckets that are too wide or too narrow, respectively. This function should only be used when Exponential Histograms are not suitable for the downstream consumers or if upstream metric sources are unable to generate Explicit Histograms. Example: -- `convert_exponential_hist_to_explicit_hist([10.0, 100.0, 1000.0, 10000.0])` +- `convert_exponential_hist_to_explicit_hist("random", [10.0, 100.0, 1000.0, 10000.0])` ## Examples From 1b5558a76339830e2b439894391d50c1314daadc Mon Sep 17 00:00:00 2001 From: Shaun Remekie Date: Wed, 31 Jul 2024 00:21:40 +0200 Subject: [PATCH 20/46] added random, uniform and midpoint distribution functions and updated functionf flow to support --- ...nvert_exponential_hist_to_explicit_hist.go | 198 +++++++++++++++++- 1 file changed, 188 insertions(+), 10 deletions(-) diff --git a/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist.go b/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist.go index d010a0be04ba..62fb7f325757 100644 --- a/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist.go +++ b/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist.go @@ -7,17 +7,28 @@ import ( "context" "fmt" "math" + "time" "go.opentelemetry.io/collector/pdata/pmetric" + "golang.org/x/exp/rand" "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl" "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl/contexts/ottlmetric" ) type convertExponentialHistToExplicitHistArguments struct { + DistributionFn string ExplicitBounds []float64 } +// distributionFnMap - map of conversion functions +var distributionFnMap = map[string]func(pmetric.ExponentialHistogramDataPoint, []float64) []uint64{ + "upper": calculateBucketCountsWithUpperBounds, + "midpoint": calculateBucketCountsWithMidpoint, + "random": calculateBucketCountsWithRandomDistribution, + "uniform": calculateBucketCountsWithUniformDistribution, +} + func newconvertExponentialHistToExplicitHistFactory() ottl.Factory[ottlmetric.TransformContext] { return ottl.NewFactory("convert_exponential_hist_to_explicit_hist", &convertExponentialHistToExplicitHistArguments{}, createconvertExponentialHistToExplicitHistFunction) } @@ -29,14 +40,28 @@ func createconvertExponentialHistToExplicitHistFunction(_ ottl.FunctionContext, return nil, fmt.Errorf("convertExponentialHistToExplicitHistFactory args must be of type *convertExponentialHistToExplicitHistArguments") } - return convertExponentialHistToExplicitHist(args.ExplicitBounds) + if len(args.DistributionFn) == 0 { + args.DistributionFn = "upper_bound" + } + + if _, ok := distributionFnMap[args.DistributionFn]; !ok { + return nil, fmt.Errorf("invalid conversion function: %s, must be one of [upper, midpoint, random, uniform]", args.DistributionFn) + + } + + return convertExponentialHistToExplicitHist(args.DistributionFn, args.ExplicitBounds) } // convertExponentialHistToExplicitHist converts an exponential histogram to a bucketed histogram -func convertExponentialHistToExplicitHist(explicitBounds []float64) (ottl.ExprFunc[ottlmetric.TransformContext], error) { +func convertExponentialHistToExplicitHist(distributionFn string, explicitBounds []float64) (ottl.ExprFunc[ottlmetric.TransformContext], error) { if len(explicitBounds) == 0 { - return nil, fmt.Errorf("explicit bounds must cannot be empty: %v", explicitBounds) + return nil, fmt.Errorf("explicit bounds cannot be empty: %v", explicitBounds) + } + + distFn, ok := distributionFnMap[distributionFn] + if !ok { + return nil, fmt.Errorf("invalid conversion function: %s, must be one of [upper, midpoint, random, uniform]", distributionFn) } return func(_ context.Context, tCtx ottlmetric.TransformContext) (any, error) { @@ -54,7 +79,7 @@ func convertExponentialHistToExplicitHist(explicitBounds []float64) (ottl.ExprFu // map over each exponential histogram data point and calculate the bucket counts for i := 0; i < dps.Len(); i++ { expDataPoint := dps.At(i) - bucketCounts := calculateBucketCounts(expDataPoint, explicitBounds) + bucketCounts := distFn(expDataPoint, explicitBounds) bucketHistDatapoint := bucketedHist.DataPoints().AppendEmpty() bucketHistDatapoint.SetStartTimestamp(expDataPoint.StartTimestamp()) bucketHistDatapoint.SetTimestamp(expDataPoint.Timestamp()) @@ -79,7 +104,7 @@ func convertExponentialHistToExplicitHist(explicitBounds []float64) (ottl.ExprFu }, nil } -// calculateBucketCounts function calculates the bucket counts for a given exponential histogram data point. +// calculateBucketCountsWithUpperBounds function calculates the bucket counts for a given exponential histogram data point. // The algorithm is inspired by the logExponentialHistogramDataPoints function used to Print Exponential Histograms in Otel. // found here: https://github.com/open-telemetry/opentelemetry-collector/blob/main/exporter/internal/otlptext/databuffer.go#L144-L201 // @@ -91,11 +116,7 @@ func convertExponentialHistToExplicitHist(explicitBounds []float64) (ottl.ExprFu // // - the factor is then used to calculate the upper bound of the bucket which is calculated as // upper = math.Exp((index+1) * factor) -// -// - At this point we know that the upper bound represents the highest value that can be in this bucket, so we take the -// upper bound and compare it to each of the explicit boundaries provided by the user until we find a boundary -// that fits, that is, the first instance where upper bound <= explicit boundary. -func calculateBucketCounts(dp pmetric.ExponentialHistogramDataPoint, boundaries []float64) []uint64 { +func calculateBucketCountsWithUpperBounds(dp pmetric.ExponentialHistogramDataPoint, boundaries []float64) []uint64 { scale := int(dp.Scale()) factor := math.Ldexp(math.Ln2, -scale) posB := dp.Positive().BucketCounts() @@ -103,9 +124,13 @@ func calculateBucketCounts(dp pmetric.ExponentialHistogramDataPoint, boundaries for pos := 0; pos < posB.Len(); pos++ { index := dp.Positive().Offset() + int32(pos) + // calculate the upper bound of the bucket upper := math.Exp(float64(index+1) * factor) count := posB.At(pos) + // At this point we know that the upper bound represents the highest value that can be in this bucket, so we take the + // upper bound and compare it to each of the explicit boundaries provided by the user until we find a boundary + // that fits, that is, the first instance where upper bound <= explicit boundary. for j, boundary := range boundaries { if upper <= boundary { bucketCounts[j] += count @@ -119,3 +144,156 @@ func calculateBucketCounts(dp pmetric.ExponentialHistogramDataPoint, boundaries return bucketCounts } + +// calculateBucketCountsWithMidpoint function calculates the bucket counts for a given exponential histogram data point. +// This algorithm is similar to calculateBucketCountsWithUpperBounds, but instead of using the upper bound of the bucket +// to determine the bucket, it uses the midpoint of the upper and lower bounds. +// The midpoint is calculated as (upper + lower) / 2. +func calculateBucketCountsWithMidpoint(dp pmetric.ExponentialHistogramDataPoint, boundaries []float64) []uint64 { + scale := int(dp.Scale()) + factor := math.Ldexp(math.Ln2, -scale) + posB := dp.Positive().BucketCounts() + bucketCounts := make([]uint64, len(boundaries)+1) // +1 for the overflow bucket + + for pos := 0; pos < posB.Len(); pos++ { + index := dp.Positive().Offset() + int32(pos) + upper := math.Exp(float64(index+1) * factor) + lower := math.Exp(float64(index) * factor) + midpoint := (upper + lower) / 2 + count := posB.At(pos) + + for j, boundary := range boundaries { + if midpoint <= boundary { + bucketCounts[j] += count + break + } + if j == len(boundaries)-1 { + bucketCounts[j+1] += count // Overflow bucket + } + } + } + + return bucketCounts +} + +// calculateBucketCountsWithUniformDistribution distributes counts from an exponential histogram data point into a set of linear boundaries using uniform distribution +func calculateBucketCountsWithUniformDistribution(dp pmetric.ExponentialHistogramDataPoint, boundaries []float64) []uint64 { + scale := int(dp.Scale()) + factor := math.Ldexp(math.Ln2, -scale) + posB := dp.Positive().BucketCounts() + bucketCounts := make([]uint64, len(boundaries)+1) // +1 for the overflow bucket + + for pos := 0; pos < posB.Len(); pos++ { + index := dp.Positive().Offset() + int32(pos) + lower := math.Exp(float64(index) * factor) + upper := math.Exp(float64(index+1) * factor) + count := posB.At(pos) + + // Find the boundaries that intersect with the bucket range + var start, end int + for start = 0; start < len(boundaries); start++ { + if lower <= boundaries[start] { + break + } + } + for end = start; end < len(boundaries); end++ { + if upper <= boundaries[end] { + break + } + } + + // Distribute the count uniformly across the intersecting boundaries + if end > start { + countPerBoundary := count / uint64(end-start+1) + remainder := count % uint64(end-start+1) + + for j := start; j <= end; j++ { + bucketCounts[j] += countPerBoundary + if remainder > 0 { + bucketCounts[j]++ + remainder-- + } + } + } else { + // Handle the case where the bucket range does not intersect with any boundaries + bucketCounts[start] += count + } + } + + return bucketCounts +} + +// calculateBucketCountsWithRandomDistribution distributes counts from an exponential histogram data point into a set of linear boundaries using random distribution +func calculateBucketCountsWithRandomDistribution(dp pmetric.ExponentialHistogramDataPoint, boundaries []float64) []uint64 { + rand.Seed(uint64(time.Now().UnixNano())) // Seed the random number generator + scale := int(dp.Scale()) + // factor is used to scale the exponential histogram + factor := math.Ldexp(math.Ln2, -scale) + posB := dp.Positive().BucketCounts() + bucketCounts := make([]uint64, len(boundaries)+1) // +1 for the overflow bucket + + for pos := 0; pos < posB.Len(); pos++ { + // Calculate the lower and upper bounds of the current bucket + index := dp.Positive().Offset() + int32(pos) + lower := math.Exp(float64(index) * factor) + upper := math.Exp(float64(index+1) * factor) + count := posB.At(pos) + + // Find the boundaries that intersect with the bucket range + start := 0 + for start < len(boundaries) && boundaries[start] < lower { + start++ + } + end := start + for end < len(boundaries) && boundaries[end] < upper { + end++ + } + + // Randomly distribute the count across the intersecting boundaries + if end > start { + rangeWidth := upper - lower + totalAllocated := uint64(0) + + for j := start; j <= end; j++ { + var boundaryLower, boundaryUpper float64 + if j == 0 { + // For the first boundary, set the lower limit to the bucket's lower bound + boundaryLower = lower + } else { + // Otherwise, set it to the previous boundary + boundaryLower = boundaries[j-1] + } + if j == len(boundaries) { + // For the last boundary, set the upper limit to the bucket's upper bound + boundaryUpper = upper + } else { + // Otherwise, set it to the current boundary + boundaryUpper = boundaries[j] + } + + // Calculate the overlap width between the boundary range and the bucket range + overlapWidth := math.Min(boundaryUpper, upper) - math.Max(boundaryLower, lower) + // Proportionally allocate the count based on the overlap width + allocatedCount := uint64(float64(count) * (overlapWidth / rangeWidth)) + + // Randomly assign the counts to the boundaries + randomlyAllocatedCount := uint64(rand.Float64() * float64(allocatedCount)) + bucketCounts[j] += randomlyAllocatedCount + totalAllocated += randomlyAllocatedCount + } + + // Distribute any remaining count + remainingCount := count - totalAllocated + for remainingCount > 0 { + randomBoundary := rand.Intn(end-start+1) + start + bucketCounts[randomBoundary]++ + remainingCount-- + } + } else { + // If the bucket range does not intersect with any boundaries, assign the entire count to the start boundary + bucketCounts[start] += count + } + } + + return bucketCounts +} From a260667e28039aba13b3a86bf75b4230bf1ce0e9 Mon Sep 17 00:00:00 2001 From: Shaun Remekie Date: Wed, 31 Jul 2024 00:22:16 +0200 Subject: [PATCH 21/46] added tests for random, uniform and midpoint distribution implementations --- ..._exponential_hist_to_explicit_hist_test.go | 632 +++++++++++++++--- 1 file changed, 549 insertions(+), 83 deletions(-) diff --git a/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist_test.go b/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist_test.go index ad642dce9542..cd0163d2739b 100644 --- a/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist_test.go +++ b/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist_test.go @@ -14,76 +14,82 @@ import ( "go.opentelemetry.io/collector/pdata/pmetric" ) -func Test_convert_exponential_hist_to_explicit_hist(t *testing.T) { - exponentialHistInput := pmetric.NewMetric() - exponentialHistInput.SetName("response_time") - dp := exponentialHistInput.SetEmptyExponentialHistogram().DataPoints().AppendEmpty() - exponentialHistInput.ExponentialHistogram().SetAggregationTemporality(1) - dp.SetCount(2) - dp.SetScale(7) - dp.SetSum(361) - dp.SetMax(195) - dp.SetMin(166) +var nonExponentialHist = func() pmetric.Metric { + m := pmetric.NewMetric() + m.SetName("not-exponentialhist") + m.SetEmptyGauge() + return m +} +func TestUpper_convert_exponential_hist_to_explicit_hist(t *testing.T) { ts := pcommon.NewTimestampFromTime(time.Now()) - dp.SetTimestamp(ts) - - // set attributes - dp.Attributes().PutStr("metric_type", "timing") - - // set bucket counts - dp.Positive().BucketCounts().Append( - 1, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 1) - - dp.Positive().SetOffset(944) - nonExponentialHist := func() pmetric.Metric { - m := pmetric.NewMetric() - m.SetName("not-exponentialhist") - m.SetEmptyGauge() - return m + defaultTestMetric := func() pmetric.Metric { + exponentialHistInput := pmetric.NewMetric() + exponentialHistInput.SetName("response_time") + dp := exponentialHistInput.SetEmptyExponentialHistogram().DataPoints().AppendEmpty() + exponentialHistInput.ExponentialHistogram().SetAggregationTemporality(1) + dp.SetCount(2) + dp.SetScale(7) + dp.SetSum(361) + dp.SetMax(195) + dp.SetMin(166) + + dp.SetTimestamp(ts) + + // set attributes + dp.Attributes().PutStr("metric_type", "timing") + + // set bucket counts + dp.Positive().BucketCounts().Append( + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1) + + dp.Positive().SetOffset(944) + return exponentialHistInput } tests := []struct { - name string - input pmetric.Metric - arg []float64 // ExplicitBounds - want func(pmetric.Metric) + name string + input func() pmetric.Metric + arg []float64 // ExplicitBounds + distribution string + want func(pmetric.Metric) }{ { // having explicit bounds that are all smaller than the exponential histogram's scale // will results in all the exponential histogram's data points being placed in the overflow bucket - name: "convert exponential histogram to explicit histogram with smaller bounds", - input: exponentialHistInput, - arg: []float64{1.0, 2.0, 3.0, 4.0, 5.0}, + name: "convert exponential histogram to explicit histogram with smaller bounds with upper distribute", + input: defaultTestMetric, + arg: []float64{1.0, 2.0, 3.0, 4.0, 5.0}, + distribution: "upper", want: func(metric pmetric.Metric) { metric.SetName("response_time") @@ -109,9 +115,10 @@ func Test_convert_exponential_hist_to_explicit_hist(t *testing.T) { { // having explicit bounds that are all larger than the exponential histogram's scale // will results in all the exponential histogram's data points being placed in the 1st bucket - name: "convert exponential histogram to explicit histogram with large bounds", - input: exponentialHistInput, - arg: []float64{1000.0, 2000.0, 3000.0, 4000.0, 5000.0}, + name: "convert exponential histogram to explicit histogram with large bounds", + input: defaultTestMetric, + arg: []float64{1000.0, 2000.0, 3000.0, 4000.0, 5000.0}, + distribution: "upper", want: func(metric pmetric.Metric) { metric.SetName("response_time") @@ -136,9 +143,10 @@ func Test_convert_exponential_hist_to_explicit_hist(t *testing.T) { }, { - name: "convert exponential histogram to explicit history", - input: exponentialHistInput, - arg: []float64{160.0, 170.0, 180.0, 190.0, 200.0}, + name: "convert exponential histogram to explicit history", + input: defaultTestMetric, + arg: []float64{160.0, 170.0, 180.0, 190.0, 200.0}, + distribution: "upper", want: func(metric pmetric.Metric) { metric.SetName("response_time") @@ -162,9 +170,10 @@ func Test_convert_exponential_hist_to_explicit_hist(t *testing.T) { }, }, { - name: "convert exponential histogram to explicit history with 0 scale", - input: exponentialHistInput, - arg: []float64{160.0, 170.0, 180.0, 190.0, 200.0}, + name: "convert exponential histogram to explicit history with 0 scale", + input: defaultTestMetric, + arg: []float64{160.0, 170.0, 180.0, 190.0, 200.0}, + distribution: "upper", want: func(metric pmetric.Metric) { metric.SetName("response_time") @@ -191,14 +200,15 @@ func Test_convert_exponential_hist_to_explicit_hist(t *testing.T) { // 0 scale exponential histogram will result in an extremely large upper bound // resulting in all the counts being in buckets much larger than the explicit bounds // thus all counts will be in the overflow bucket - name: "0 scale expontential histogram given", + name: "0 scale expontential histogram given using upper distribute", input: func() pmetric.Metric { m := pmetric.NewMetric() - exponentialHistInput.CopyTo(m) + defaultTestMetric().CopyTo(m) m.ExponentialHistogram().DataPoints().At(0).SetScale(0) return m - }(), - arg: []float64{160.0, 170.0, 180.0, 190.0, 200.0}, + }, + arg: []float64{160.0, 170.0, 180.0, 190.0, 200.0}, + distribution: "upper", want: func(metric pmetric.Metric) { metric.SetName("response_time") dp := metric.SetEmptyHistogram().DataPoints().AppendEmpty() @@ -219,6 +229,165 @@ func Test_convert_exponential_hist_to_explicit_hist(t *testing.T) { dp.ExplicitBounds().Append(160.0, 170.0, 180.0, 190.0, 200.0) }, }, + { + name: "empty expontential histogram given using upper distribute", + input: func() pmetric.Metric { + m := pmetric.NewMetric() + m.SetName("empty") + m.SetEmptyExponentialHistogram() + return m + }, + arg: []float64{160.0, 170.0, 180.0, 190.0, 200.0}, + distribution: "upper", + want: func(metric pmetric.Metric) { + metric.SetName("empty") + metric.SetEmptyHistogram() + }, + }, + { + name: "non-expontential histogram", + arg: []float64{0}, + distribution: "upper", + input: nonExponentialHist, + want: func(metric pmetric.Metric) { + nonExponentialHist().CopyTo(metric) + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + metric := pmetric.NewMetric() + tt.input().CopyTo(metric) + + ctx := ottlmetric.NewTransformContext(metric, pmetric.NewMetricSlice(), pcommon.NewInstrumentationScope(), pcommon.NewResource(), pmetric.NewScopeMetrics(), pmetric.NewResourceMetrics()) + + exprFunc, err := convertExponentialHistToExplicitHist(tt.distribution, tt.arg) + assert.NoError(t, err) + _, err = exprFunc(nil, ctx) + assert.NoError(t, err) + + expected := pmetric.NewMetric() + tt.want(expected) + + assert.Equal(t, expected, metric) + }) + } +} + +func TestMidpoint_convert_exponential_hist_to_explicit_hist(t *testing.T) { + ts := pcommon.NewTimestampFromTime(time.Now()) + defaultTestMetric := func() pmetric.Metric { + m := pmetric.NewMetric() + m.SetName("test-metric") + dp := m.SetEmptyExponentialHistogram().DataPoints().AppendEmpty() + m.ExponentialHistogram().SetAggregationTemporality(1) + dp.SetCount(44) + dp.SetScale(0) + dp.SetSum(999) + dp.SetMax(245) + dp.SetMin(40) + + dp.SetTimestamp(ts) + + dp.Attributes().PutStr("metric_type", "timing") + dp.Positive().SetOffset(5) + dp.Positive().BucketCounts().FromRaw([]uint64{10, 22, 12}) + return m + } + + tests := []struct { + name string + input func() pmetric.Metric + arg []float64 // ExplicitBounds + distribution string + want func(pmetric.Metric) + }{ + { + // having explicit bounds that are all smaller than the exponential histogram's scale + // will results in all the exponential histogram's data points being placed in the overflow bucket + name: "convert exponential histogram to explicit histogram with smaller bounds", + input: defaultTestMetric, + arg: []float64{1.0, 2.0, 3.0, 4.0, 5.0}, + distribution: "midpoint", + want: func(metric pmetric.Metric) { + metric.SetName("test-metric") + dp := metric.SetEmptyHistogram().DataPoints().AppendEmpty() + metric.Histogram().SetAggregationTemporality(1) + dp.SetCount(44) + dp.SetSum(999) + dp.SetMax(245) + dp.SetMin(40) + dp.SetTimestamp(ts) + + // set attributes + dp.Attributes().PutStr("metric_type", "timing") + + // set bucket counts + dp.BucketCounts().Append(0, 0, 0, 0, 0, 44) // expect all counts in the overflow bucket + + // set explictbounds + dp.ExplicitBounds().Append(1.0, 2.0, 3.0, 4.0, 5.0) + + }, + }, + { + // having explicit bounds that are all larger than the exponential histogram's scale + // will results in all the exponential histogram's data points being placed in the 1st bucket + name: "convert exponential histogram to explicit histogram with large bounds", + input: defaultTestMetric, + arg: []float64{1000.0, 2000.0, 3000.0, 4000.0, 5000.0}, + distribution: "midpoint", + want: func(metric pmetric.Metric) { + + metric.SetName("test-metric") + dp := metric.SetEmptyHistogram().DataPoints().AppendEmpty() + metric.Histogram().SetAggregationTemporality(1) + dp.SetCount(44) + dp.SetSum(999) + dp.SetMax(245) + dp.SetMin(40) + dp.SetTimestamp(ts) + + // set attributes + dp.Attributes().PutStr("metric_type", "timing") + + // set bucket counts + dp.BucketCounts().Append(44, 0, 0, 0, 0, 0) // expect all counts in the 1st bucket + + // set explictbounds + dp.ExplicitBounds().Append(1000.0, 2000.0, 3000.0, 4000.0, 5000.0) + + }, + }, + { + + name: "convert exponential histogram to explicit hist", + input: defaultTestMetric, + arg: []float64{10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0}, + distribution: "midpoint", + want: func(metric pmetric.Metric) { + + metric.SetName("test-metric") + dp := metric.SetEmptyHistogram().DataPoints().AppendEmpty() + metric.Histogram().SetAggregationTemporality(1) + dp.SetCount(44) + dp.SetSum(999) + dp.SetMax(245) + dp.SetMin(40) + dp.SetTimestamp(ts) + + // set attributes + dp.Attributes().PutStr("metric_type", "timing") + + // set bucket counts + dp.BucketCounts().Append(0, 0, 0, 0, 10, 0, 0, 0, 0, 22, 12) + + // set explictbounds + dp.ExplicitBounds().Append(10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0) + + }, + }, { name: "empty expontential histogram given", input: func() pmetric.Metric { @@ -226,17 +395,19 @@ func Test_convert_exponential_hist_to_explicit_hist(t *testing.T) { m.SetName("empty") m.SetEmptyExponentialHistogram() return m - }(), - arg: []float64{160.0, 170.0, 180.0, 190.0, 200.0}, + }, + arg: []float64{160.0, 170.0, 180.0, 190.0, 200.0}, + distribution: "midpoint", want: func(metric pmetric.Metric) { metric.SetName("empty") metric.SetEmptyHistogram() }, }, { - name: "non-expontential histogram given", - arg: []float64{0}, - input: nonExponentialHist(), + name: "non-expontential histogram given using upper distribute", + arg: []float64{0}, + distribution: "midpoint", + input: nonExponentialHist, want: func(metric pmetric.Metric) { nonExponentialHist().CopyTo(metric) }, @@ -246,11 +417,11 @@ func Test_convert_exponential_hist_to_explicit_hist(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { metric := pmetric.NewMetric() - tt.input.CopyTo(metric) + tt.input().CopyTo(metric) ctx := ottlmetric.NewTransformContext(metric, pmetric.NewMetricSlice(), pcommon.NewInstrumentationScope(), pcommon.NewResource(), pmetric.NewScopeMetrics(), pmetric.NewResourceMetrics()) - exprFunc, err := convertExponentialHistToExplicitHist(tt.arg) + exprFunc, err := convertExponentialHistToExplicitHist(tt.distribution, tt.arg) assert.NoError(t, err) _, err = exprFunc(nil, ctx) assert.NoError(t, err) @@ -263,6 +434,301 @@ func Test_convert_exponential_hist_to_explicit_hist(t *testing.T) { } } +func TestUniforn_convert_exponential_hist_to_explicit_hist(t *testing.T) { + ts := pcommon.NewTimestampFromTime(time.Now()) + defaultTestMetric := func() pmetric.Metric { + m := pmetric.NewMetric() + m.SetName("test-metric") + dp := m.SetEmptyExponentialHistogram().DataPoints().AppendEmpty() + m.ExponentialHistogram().SetAggregationTemporality(1) + dp.SetCount(44) + dp.SetScale(0) + dp.SetSum(999) + dp.SetMax(245) + dp.SetMin(40) + + dp.SetTimestamp(ts) + + dp.Attributes().PutStr("metric_type", "timing") + dp.Positive().SetOffset(5) + dp.Positive().BucketCounts().FromRaw([]uint64{10, 22, 12}) + return m + } + + tests := []struct { + name string + input func() pmetric.Metric + arg []float64 // ExplicitBounds + distribution string + want func(pmetric.Metric) + }{ + { + // having explicit bounds that are all smaller than the exponential histogram's scale + // will results in all the exponential histogram's data points being placed in the overflow bucket + name: "convert exponential histogram to explicit histogram with smaller bounds", + input: defaultTestMetric, + arg: []float64{1.0, 2.0, 3.0, 4.0, 5.0}, + distribution: "uniform", + want: func(metric pmetric.Metric) { + metric.SetName("test-metric") + dp := metric.SetEmptyHistogram().DataPoints().AppendEmpty() + metric.Histogram().SetAggregationTemporality(1) + dp.SetCount(44) + dp.SetSum(999) + dp.SetMax(245) + dp.SetMin(40) + dp.SetTimestamp(ts) + + // set attributes + dp.Attributes().PutStr("metric_type", "timing") + + // set bucket counts + dp.BucketCounts().Append(0, 0, 0, 0, 0, 44) // expect all counts in the overflow bucket + + // set explictbounds + dp.ExplicitBounds().Append(1.0, 2.0, 3.0, 4.0, 5.0) + + }, + }, + { + // having explicit bounds that are all larger than the exponential histogram's scale + // will results in all the exponential histogram's data points being placed in the 1st bucket + name: "convert exponential histogram to explicit histogram with large bounds", + input: defaultTestMetric, + arg: []float64{1000.0, 2000.0, 3000.0, 4000.0, 5000.0}, + distribution: "uniform", + want: func(metric pmetric.Metric) { + + metric.SetName("test-metric") + dp := metric.SetEmptyHistogram().DataPoints().AppendEmpty() + metric.Histogram().SetAggregationTemporality(1) + dp.SetCount(44) + dp.SetSum(999) + dp.SetMax(245) + dp.SetMin(40) + dp.SetTimestamp(ts) + + // set attributes + dp.Attributes().PutStr("metric_type", "timing") + + // set bucket counts + dp.BucketCounts().Append(44, 0, 0, 0, 0, 0) // expect all counts in the 1st bucket + + // set explictbounds + dp.ExplicitBounds().Append(1000.0, 2000.0, 3000.0, 4000.0, 5000.0) + + }, + }, + { + + name: "convert exponential histogram to explicit hist", + input: defaultTestMetric, + arg: []float64{10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0}, + distribution: "uniform", + want: func(metric pmetric.Metric) { + + metric.SetName("test-metric") + dp := metric.SetEmptyHistogram().DataPoints().AppendEmpty() + metric.Histogram().SetAggregationTemporality(1) + dp.SetCount(44) + dp.SetSum(999) + dp.SetMax(245) + dp.SetMin(40) + dp.SetTimestamp(ts) + + // set attributes + dp.Attributes().PutStr("metric_type", "timing") + + // set bucket counts + dp.BucketCounts().Append(0, 0, 0, 3, 3, 2, 7, 5, 4, 4, 16) + + // set explictbounds + dp.ExplicitBounds().Append(10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0) + + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + metric := pmetric.NewMetric() + tt.input().CopyTo(metric) + + ctx := ottlmetric.NewTransformContext(metric, pmetric.NewMetricSlice(), pcommon.NewInstrumentationScope(), pcommon.NewResource(), pmetric.NewScopeMetrics(), pmetric.NewResourceMetrics()) + + exprFunc, err := convertExponentialHistToExplicitHist(tt.distribution, tt.arg) + assert.NoError(t, err) + _, err = exprFunc(nil, ctx) + assert.NoError(t, err) + + expected := pmetric.NewMetric() + tt.want(expected) + + assert.Equal(t, expected, metric) + }) + } +} + +func TestRandom_convert_exponential_hist_to_explicit_hist(t *testing.T) { + ts := pcommon.NewTimestampFromTime(time.Now()) + defaultTestMetric := func() pmetric.Metric { + m := pmetric.NewMetric() + m.SetName("test-metric") + dp := m.SetEmptyExponentialHistogram().DataPoints().AppendEmpty() + m.ExponentialHistogram().SetAggregationTemporality(1) + dp.SetCount(44) + dp.SetScale(0) + dp.SetSum(999) + dp.SetMax(245) + dp.SetMin(40) + + dp.SetTimestamp(ts) + + dp.Attributes().PutStr("metric_type", "timing") + dp.Positive().SetOffset(5) + dp.Positive().BucketCounts().FromRaw([]uint64{10, 22, 12}) + return m + } + + tests := []struct { + name string + input func() pmetric.Metric + arg []float64 // ExplicitBounds + distribution string + want func(pmetric.Metric) + }{ + { + // having explicit bounds that are all smaller than the exponential histogram's scale + // will results in all the exponential histogram's data points being placed in the overflow bucket + name: "convert exponential histogram to explicit histogram with smaller bounds", + input: defaultTestMetric, + arg: []float64{1.0, 2.0, 3.0, 4.0, 5.0}, + distribution: "random", + want: func(metric pmetric.Metric) { + metric.SetName("test-metric") + dp := metric.SetEmptyHistogram().DataPoints().AppendEmpty() + metric.Histogram().SetAggregationTemporality(1) + dp.SetCount(44) + dp.SetSum(999) + dp.SetMax(245) + dp.SetMin(40) + dp.SetTimestamp(ts) + + // set attributes + dp.Attributes().PutStr("metric_type", "timing") + + // set bucket counts + dp.BucketCounts().Append(0, 0, 0, 0, 0, 44) // expect all counts in the overflow bucket + + // set explictbounds + dp.ExplicitBounds().Append(1.0, 2.0, 3.0, 4.0, 5.0) + + }, + }, + { + // having explicit bounds that are all larger than the exponential histogram's scale + // will results in all the exponential histogram's data points being placed in the 1st bucket + name: "convert exponential histogram to explicit histogram with large bounds", + input: defaultTestMetric, + arg: []float64{1000.0, 2000.0, 3000.0, 4000.0, 5000.0}, + distribution: "random", + want: func(metric pmetric.Metric) { + + metric.SetName("test-metric") + dp := metric.SetEmptyHistogram().DataPoints().AppendEmpty() + metric.Histogram().SetAggregationTemporality(1) + dp.SetCount(44) + dp.SetSum(999) + dp.SetMax(245) + dp.SetMin(40) + dp.SetTimestamp(ts) + + // set attributes + dp.Attributes().PutStr("metric_type", "timing") + + // set bucket counts + dp.BucketCounts().Append(44, 0, 0, 0, 0, 0) // expect all counts in the 1st bucket + + // set explictbounds + dp.ExplicitBounds().Append(1000.0, 2000.0, 3000.0, 4000.0, 5000.0) + + }, + }, + { + + name: "convert exponential histogram to explicit hist", + input: defaultTestMetric, + arg: []float64{10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0}, + distribution: "random", + want: func(metric pmetric.Metric) { + + metric.SetName("test-metric") + dp := metric.SetEmptyHistogram().DataPoints().AppendEmpty() + metric.Histogram().SetAggregationTemporality(1) + dp.SetCount(44) + dp.SetSum(999) + dp.SetMax(245) + dp.SetMin(40) + dp.SetTimestamp(ts) + + // set attributes + dp.Attributes().PutStr("metric_type", "timing") + + // set bucket counts + dp.BucketCounts().Append(0, 0, 0, 3, 3, 2, 7, 5, 4, 4, 16) + + // set explictbounds + dp.ExplicitBounds().Append(10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0) + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + metric := pmetric.NewMetric() + tt.input().CopyTo(metric) + + ctx := ottlmetric.NewTransformContext(metric, pmetric.NewMetricSlice(), pcommon.NewInstrumentationScope(), pcommon.NewResource(), pmetric.NewScopeMetrics(), pmetric.NewResourceMetrics()) + + exprFunc, err := convertExponentialHistToExplicitHist(tt.distribution, tt.arg) + assert.NoError(t, err) + _, err = exprFunc(nil, ctx) + assert.NoError(t, err) + + expected := pmetric.NewMetric() + tt.want(expected) + + // since the bucket counts are randomly distributed, we can't predict the exact output + // thus we only check if the metric dimensions are as expected. + if tt.name == "convert exponential histogram to explicit hist" { + expectedDp := expected.Histogram().DataPoints().At(0) + dp := metric.Histogram().DataPoints().At(0) + assert.Equal(t, + expectedDp.BucketCounts().Len(), + dp.BucketCounts().Len()) + + var count uint64 + for i := 0; i < dp.BucketCounts().Len(); i++ { + count += dp.BucketCounts().At(i) + } + + assert.Equal(t, expectedDp.Count(), count) + assert.Equal(t, expectedDp.ExplicitBounds().Len(), dp.ExplicitBounds().Len()) + + // even though the distribution is random, we know that for this + // particular test case, the min value is 40, therefore the 1st 3 buckets + // counts should be 0, as they represent values 10 - 30 + for i := 0; i < 3; i++ { + assert.Equal(t, uint64(0), dp.BucketCounts().At(i), "bucket %d", i) + } + return + } + + assert.Equal(t, expected, metric) + }) + } +} + func Test_convertExponentialHistToExplicitHist_validate(t *testing.T) { tests := []struct { name string @@ -276,9 +742,9 @@ func Test_convertExponentialHistToExplicitHist_validate(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - _, err := convertExponentialHistToExplicitHist(tt.sliceExplicitBoundsArgs) + _, err := convertExponentialHistToExplicitHist("random", tt.sliceExplicitBoundsArgs) assert.Error(t, err) - assert.True(t, strings.Contains(err.Error(), "explicit bounds must cannot be empty")) + assert.True(t, strings.Contains(err.Error(), "explicit bounds cannot be empty")) }) } } From ba8b74235ab8520c92e5e8a972554bec81c45598 Mon Sep 17 00:00:00 2001 From: Shaun Remekie Date: Wed, 31 Jul 2024 10:54:56 +0200 Subject: [PATCH 22/46] added overflow assertion to random dist test --- .../func_convert_exponential_hist_to_explicit_hist_test.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist_test.go b/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist_test.go index cd0163d2739b..81c7d1847477 100644 --- a/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist_test.go +++ b/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist_test.go @@ -716,11 +716,16 @@ func TestRandom_convert_exponential_hist_to_explicit_hist(t *testing.T) { assert.Equal(t, expectedDp.ExplicitBounds().Len(), dp.ExplicitBounds().Len()) // even though the distribution is random, we know that for this - // particular test case, the min value is 40, therefore the 1st 3 buckets + // particular test case, the min value is 40, therefore the 1st 3 bucket // counts should be 0, as they represent values 10 - 30 for i := 0; i < 3; i++ { assert.Equal(t, uint64(0), dp.BucketCounts().At(i), "bucket %d", i) } + + // since the max value in the exponential histogram is 245 + // we can assert that the overflow bucket has a count > 0 + overflow := dp.BucketCounts().At(dp.BucketCounts().Len() - 1) + assert.Greater(t, overflow, uint64(0), "overflow bucket count should be > 0") return } From c5db9284a0c2fda245d119541e34d3e898c93921 Mon Sep 17 00:00:00 2001 From: Shaun Remekie Date: Wed, 31 Jul 2024 11:32:24 +0200 Subject: [PATCH 23/46] updated readme --- processor/transformprocessor/README.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/processor/transformprocessor/README.md b/processor/transformprocessor/README.md index 3f25597d74ad..56f845746c73 100644 --- a/processor/transformprocessor/README.md +++ b/processor/transformprocessor/README.md @@ -350,21 +350,21 @@ Examples: ### convert_exponential_hist_to_explicit_hist -`convert_exponential_hist_to_explicit_hist([ExplicitBounds])` +`convert_exponential_hist_to_explicit_hist(distribution, [ExplicitBounds])` -the `convert_exponential_hist_to_explicit_hist` function converts an ExponentialHistogram to an Explicit (_normal_) Histogram. +The `convert_exponential_hist_to_explicit_hist` function converts an ExponentialHistogram to an Explicit (_normal_) Histogram. This function requires 2 arguments: -- `distribution` - This argument is defines the convertion algorithm used to distribute the exponential datapoints into a new Explicit Histogram. There are 4 distribution options: +- `distribution` - This argument defines the convertion algorithm used to distribute the exponential datapoints into a new Explicit Histogram. There are 4 distribution options:
- - __upper__ - This approach identifies the highest possible value of each exponential bucket (_the upper bound_) and uses it to distribute the datapoints by comparing the upper bound of each bucket with the ExplicitBounds. This approach works better for small/narrow exponential histograms. + - __upper__ - This approach identifies the highest possible value of each exponential bucket (_the upper bound_) and uses it to distribute the datapoints by comparing the upper bound of each bucket with the ExplicitBounds provided. This approach works better for small/narrow exponential histograms where the difference between the upper bounds and lower bounds are small.
- - __midpoint__ - This approach works in a similar way to the __upper__ approach, but instead of using the upper bound, it uses the midpoint of each exponential bucket. This approach also works better for small/narrow exponential histograms. + - __midpoint__ - This approach works in a similar way to the __upper__ approach, but instead of using the upper bound, it uses the midpoint of each exponential bucket. The midpoint is identified by calculationg the average of the upper and lower bounds. This approach also works better for small/narrow exponential histograms.
- - __uniform__ - This approach distributes the datapoints for each bucket uniformly across the __ExplicitBounds__. This approach works better for large/wide exponential histograms. + - __uniform__ - This approach distributes the datapoints for each bucket uniformly across the overlapping __ExplicitBounds__. This approach works better for large/wide exponential histograms.
- - __random__ - This approach distributes the datapoints for each bucket randomly across the __ExplicitBounds__. This approach works better for large/wide exponential histograms. + - __random__ - This approach distributes the datapoints for each bucket randomly across the overlapping __ExplicitBounds__. This approach works better for large/wide exponential histograms.
- `ExplicitBounds` represents the list of bucket boundaries for the new histogram. This argument is __required__ and __cannot be empty__. From bfc4ed91ed56ecf007cd7ba077884ebfb860cbab Mon Sep 17 00:00:00 2001 From: Shaun Remekie Date: Wed, 7 Aug 2024 19:20:27 +0200 Subject: [PATCH 24/46] fix default --- .../metrics/func_convert_exponential_hist_to_explicit_hist.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist.go b/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist.go index 62fb7f325757..f980f9cc4d9e 100644 --- a/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist.go +++ b/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist.go @@ -41,7 +41,7 @@ func createconvertExponentialHistToExplicitHistFunction(_ ottl.FunctionContext, } if len(args.DistributionFn) == 0 { - args.DistributionFn = "upper_bound" + args.DistributionFn = "upper" } if _, ok := distributionFnMap[args.DistributionFn]; !ok { From 5868330d24ac4b727e7c08447c8219564afc482c Mon Sep 17 00:00:00 2001 From: Shaun Remekie Date: Wed, 7 Aug 2024 19:20:46 +0200 Subject: [PATCH 25/46] update default --- .../metrics/func_convert_exponential_hist_to_explicit_hist.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist.go b/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist.go index f980f9cc4d9e..3ea3d7c835e7 100644 --- a/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist.go +++ b/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist.go @@ -41,7 +41,7 @@ func createconvertExponentialHistToExplicitHistFunction(_ ottl.FunctionContext, } if len(args.DistributionFn) == 0 { - args.DistributionFn = "upper" + args.DistributionFn = "random" } if _, ok := distributionFnMap[args.DistributionFn]; !ok { From a5e094f96ee56678873f2587af1c2ba42682b4c5 Mon Sep 17 00:00:00 2001 From: Shaun Remekie Date: Thu, 8 Aug 2024 10:29:29 +0200 Subject: [PATCH 26/46] removed featuregate --- .../internal/metrics/functions.go | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/processor/transformprocessor/internal/metrics/functions.go b/processor/transformprocessor/internal/metrics/functions.go index c32bd7c0babf..7d43491319c2 100644 --- a/processor/transformprocessor/internal/metrics/functions.go +++ b/processor/transformprocessor/internal/metrics/functions.go @@ -18,12 +18,6 @@ var useConvertBetweenSumAndGaugeMetricContext = featuregate.GlobalRegistry().Mus featuregate.WithRegisterDescription("When enabled will use metric context for conversion between sum and gauge"), ) -var useConvertExponentialHistogramToExplicitHistogram = featuregate.GlobalRegistry().MustRegister( - "processor.transform.ConvertExponentialHistogramToExplicitHistogram", - featuregate.StageAlpha, - featuregate.WithRegisterDescription("When enabled will use metric context for conversion of exponential histograms to explicit histograms"), -) - func DataPointFunctions() map[string]ottl.Factory[ottldatapoint.TransformContext] { functions := ottlfuncs.StandardFuncs[ottldatapoint.TransformContext]() @@ -57,6 +51,7 @@ func MetricFunctions() map[string]ottl.Factory[ottlmetric.TransformContext] { newCopyMetricFactory(), newScaleMetricFactory(), newAggregateOnAttributesFactory(), + newconvertExponentialHistToExplicitHistFactory(), ) if useConvertBetweenSumAndGaugeMetricContext.IsEnabled() { @@ -68,14 +63,6 @@ func MetricFunctions() map[string]ottl.Factory[ottlmetric.TransformContext] { } } - if useConvertExponentialHistogramToExplicitHistogram.IsEnabled() { - for _, f := range []ottl.Factory[ottlmetric.TransformContext]{ - newconvertExponentialHistToExplicitHistFactory(), - } { - metricFunctions[f.Name()] = f - } - } - for k, v := range metricFunctions { functions[k] = v } From 12ad3356d6e1ab820390005c99a9d2cc69bee504 Mon Sep 17 00:00:00 2001 From: Shaun Remekie Date: Tue, 13 Aug 2024 17:35:22 +0200 Subject: [PATCH 27/46] add function to test --- processor/transformprocessor/internal/metrics/functions_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/processor/transformprocessor/internal/metrics/functions_test.go b/processor/transformprocessor/internal/metrics/functions_test.go index 2ea2da3eea89..21212157ad74 100644 --- a/processor/transformprocessor/internal/metrics/functions_test.go +++ b/processor/transformprocessor/internal/metrics/functions_test.go @@ -39,6 +39,7 @@ func Test_MetricFunctions(t *testing.T) { expected["extract_count_metric"] = newExtractCountMetricFactory() expected["copy_metric"] = newCopyMetricFactory() expected["scale_metric"] = newScaleMetricFactory() + expected["convert_exponential_hist_to_explicit_hist"] = newconvertExponentialHistToExplicitHistFactory() defer testutil.SetFeatureGateForTest(t, useConvertBetweenSumAndGaugeMetricContext, true)() actual := MetricFunctions() From e50619beceddc466064e684c0c2636f6200eeee0 Mon Sep 17 00:00:00 2001 From: Shaun Remekie Date: Tue, 13 Aug 2024 17:55:06 +0200 Subject: [PATCH 28/46] updated tests --- ..._exponential_hist_to_explicit_hist_test.go | 60 ++++++++++++++----- 1 file changed, 46 insertions(+), 14 deletions(-) diff --git a/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist_test.go b/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist_test.go index 81c7d1847477..4f16562f8ea0 100644 --- a/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist_test.go +++ b/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist_test.go @@ -105,7 +105,7 @@ func TestUpper_convert_exponential_hist_to_explicit_hist(t *testing.T) { dp.Attributes().PutStr("metric_type", "timing") // set bucket counts - dp.BucketCounts().Append(0, 0, 0, 0, 0, 2) // expect all counts in the overflow bucket + dp.BucketCounts().Append(0, 0, 0, 0, 2) // expect all counts in the overflow bucket // set explictbounds dp.ExplicitBounds().Append(1.0, 2.0, 3.0, 4.0, 5.0) @@ -134,7 +134,7 @@ func TestUpper_convert_exponential_hist_to_explicit_hist(t *testing.T) { dp.Attributes().PutStr("metric_type", "timing") // set bucket counts - dp.BucketCounts().Append(2, 0, 0, 0, 0, 0) // expect all counts in the 1st bucket + dp.BucketCounts().Append(2, 0, 0, 0, 0) // expect all counts in the 1st bucket // set explictbounds dp.ExplicitBounds().Append(1000.0, 2000.0, 3000.0, 4000.0, 5000.0) @@ -162,7 +162,7 @@ func TestUpper_convert_exponential_hist_to_explicit_hist(t *testing.T) { dp.Attributes().PutStr("metric_type", "timing") // set bucket counts - dp.BucketCounts().Append(0, 1, 0, 0, 1, 0) + dp.BucketCounts().Append(1, 0, 0, 1, 0) // set explictbounds dp.ExplicitBounds().Append(160.0, 170.0, 180.0, 190.0, 200.0) @@ -189,7 +189,7 @@ func TestUpper_convert_exponential_hist_to_explicit_hist(t *testing.T) { dp.Attributes().PutStr("metric_type", "timing") // set bucket counts - dp.BucketCounts().Append(0, 1, 0, 0, 1, 0) + dp.BucketCounts().Append(1, 0, 0, 1, 0) // set explictbounds dp.ExplicitBounds().Append(160.0, 170.0, 180.0, 190.0, 200.0) @@ -223,7 +223,7 @@ func TestUpper_convert_exponential_hist_to_explicit_hist(t *testing.T) { dp.Attributes().PutStr("metric_type", "timing") // set bucket counts - dp.BucketCounts().Append(0, 0, 0, 0, 0, 2) + dp.BucketCounts().Append(0, 0, 0, 0, 2) // set explictbounds dp.ExplicitBounds().Append(160.0, 170.0, 180.0, 190.0, 200.0) @@ -324,7 +324,7 @@ func TestMidpoint_convert_exponential_hist_to_explicit_hist(t *testing.T) { dp.Attributes().PutStr("metric_type", "timing") // set bucket counts - dp.BucketCounts().Append(0, 0, 0, 0, 0, 44) // expect all counts in the overflow bucket + dp.BucketCounts().Append(0, 0, 0, 0, 44) // expect all counts in the overflow bucket // set explictbounds dp.ExplicitBounds().Append(1.0, 2.0, 3.0, 4.0, 5.0) @@ -353,7 +353,7 @@ func TestMidpoint_convert_exponential_hist_to_explicit_hist(t *testing.T) { dp.Attributes().PutStr("metric_type", "timing") // set bucket counts - dp.BucketCounts().Append(44, 0, 0, 0, 0, 0) // expect all counts in the 1st bucket + dp.BucketCounts().Append(44, 0, 0, 0, 0) // expect all counts in the 1st bucket // set explictbounds dp.ExplicitBounds().Append(1000.0, 2000.0, 3000.0, 4000.0, 5000.0) @@ -381,13 +381,45 @@ func TestMidpoint_convert_exponential_hist_to_explicit_hist(t *testing.T) { dp.Attributes().PutStr("metric_type", "timing") // set bucket counts - dp.BucketCounts().Append(0, 0, 0, 0, 10, 0, 0, 0, 0, 22, 12) + dp.BucketCounts().Append(0, 0, 0, 10, 0, 0, 0, 0, 22, 12) // set explictbounds dp.ExplicitBounds().Append(10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0) }, }, + { + + name: "convert exponential histogram to explicit hist with zero count", + input: func() pmetric.Metric { + m := defaultTestMetric() + m.ExponentialHistogram().DataPoints().At(0).SetZeroCount(5) + return m + }, + arg: []float64{0, 10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0}, + distribution: "midpoint", + want: func(metric pmetric.Metric) { + + metric.SetName("test-metric") + dp := metric.SetEmptyHistogram().DataPoints().AppendEmpty() + metric.Histogram().SetAggregationTemporality(1) + dp.SetCount(44) + dp.SetSum(999) + dp.SetMax(245) + dp.SetMin(40) + dp.SetTimestamp(ts) + + // set attributes + dp.Attributes().PutStr("metric_type", "timing") + + // set bucket counts + dp.BucketCounts().Append(5, 0, 0, 0, 10, 0, 0, 0, 0, 22, 12) + + // set explictbounds + dp.ExplicitBounds().Append(0, 10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0) + + }, + }, { name: "empty expontential histogram given", input: func() pmetric.Metric { @@ -483,7 +515,7 @@ func TestUniforn_convert_exponential_hist_to_explicit_hist(t *testing.T) { dp.Attributes().PutStr("metric_type", "timing") // set bucket counts - dp.BucketCounts().Append(0, 0, 0, 0, 0, 44) // expect all counts in the overflow bucket + dp.BucketCounts().Append(0, 0, 0, 0, 44) // expect all counts in the overflow bucket // set explictbounds dp.ExplicitBounds().Append(1.0, 2.0, 3.0, 4.0, 5.0) @@ -512,7 +544,7 @@ func TestUniforn_convert_exponential_hist_to_explicit_hist(t *testing.T) { dp.Attributes().PutStr("metric_type", "timing") // set bucket counts - dp.BucketCounts().Append(44, 0, 0, 0, 0, 0) // expect all counts in the 1st bucket + dp.BucketCounts().Append(44, 0, 0, 0, 0) // expect all counts in the 1st bucket // set explictbounds dp.ExplicitBounds().Append(1000.0, 2000.0, 3000.0, 4000.0, 5000.0) @@ -540,7 +572,7 @@ func TestUniforn_convert_exponential_hist_to_explicit_hist(t *testing.T) { dp.Attributes().PutStr("metric_type", "timing") // set bucket counts - dp.BucketCounts().Append(0, 0, 0, 3, 3, 2, 7, 5, 4, 4, 16) + dp.BucketCounts().Append(0, 0, 0, 3, 3, 2, 8, 6, 5, 17) // set explictbounds dp.ExplicitBounds().Append(10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0) @@ -618,7 +650,7 @@ func TestRandom_convert_exponential_hist_to_explicit_hist(t *testing.T) { dp.Attributes().PutStr("metric_type", "timing") // set bucket counts - dp.BucketCounts().Append(0, 0, 0, 0, 0, 44) // expect all counts in the overflow bucket + dp.BucketCounts().Append(0, 0, 0, 0, 44) // expect all counts in the overflow bucket // set explictbounds dp.ExplicitBounds().Append(1.0, 2.0, 3.0, 4.0, 5.0) @@ -647,7 +679,7 @@ func TestRandom_convert_exponential_hist_to_explicit_hist(t *testing.T) { dp.Attributes().PutStr("metric_type", "timing") // set bucket counts - dp.BucketCounts().Append(44, 0, 0, 0, 0, 0) // expect all counts in the 1st bucket + dp.BucketCounts().Append(44, 0, 0, 0, 0) // expect all counts in the 1st bucket // set explictbounds dp.ExplicitBounds().Append(1000.0, 2000.0, 3000.0, 4000.0, 5000.0) @@ -675,7 +707,7 @@ func TestRandom_convert_exponential_hist_to_explicit_hist(t *testing.T) { dp.Attributes().PutStr("metric_type", "timing") // set bucket counts - dp.BucketCounts().Append(0, 0, 0, 3, 3, 2, 7, 5, 4, 4, 16) + dp.BucketCounts().Append(0, 0, 3, 3, 2, 7, 5, 4, 4, 16) // set explictbounds dp.ExplicitBounds().Append(10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0) From c05bf137ad898c1f7f3a78a41ae862438b58e022 Mon Sep 17 00:00:00 2001 From: Shaun Remekie Date: Tue, 13 Aug 2024 17:58:00 +0200 Subject: [PATCH 29/46] - updated algorithms, fixed minor issues with accuracy of conversion --- ...nvert_exponential_hist_to_explicit_hist.go | 356 +++++++++--------- 1 file changed, 181 insertions(+), 175 deletions(-) diff --git a/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist.go b/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist.go index 3ea3d7c835e7..736deac47960 100644 --- a/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist.go +++ b/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist.go @@ -7,8 +7,8 @@ import ( "context" "fmt" "math" - "time" + "go.opentelemetry.io/collector/pdata/pcommon" "go.opentelemetry.io/collector/pdata/pmetric" "golang.org/x/exp/rand" @@ -22,15 +22,16 @@ type convertExponentialHistToExplicitHistArguments struct { } // distributionFnMap - map of conversion functions -var distributionFnMap = map[string]func(pmetric.ExponentialHistogramDataPoint, []float64) []uint64{ - "upper": calculateBucketCountsWithUpperBounds, - "midpoint": calculateBucketCountsWithMidpoint, - "random": calculateBucketCountsWithRandomDistribution, - "uniform": calculateBucketCountsWithUniformDistribution, +var distributionFnMap = map[string]distAlgorithm{ + "upper": upperAlgorithm, + "midpoint": midpointAlgorithm, + "random": randomAlgorithm, + "uniform": uniformAlgorithm, } func newconvertExponentialHistToExplicitHistFactory() ottl.Factory[ottlmetric.TransformContext] { - return ottl.NewFactory("convert_exponential_hist_to_explicit_hist", &convertExponentialHistToExplicitHistArguments{}, createconvertExponentialHistToExplicitHistFunction) + return ottl.NewFactory("convert_exponential_hist_to_explicit_hist", + &convertExponentialHistToExplicitHistArguments{}, createconvertExponentialHistToExplicitHistFunction) } func createconvertExponentialHistToExplicitHistFunction(_ ottl.FunctionContext, oArgs ottl.Arguments) (ottl.ExprFunc[ottlmetric.TransformContext], error) { @@ -61,7 +62,7 @@ func convertExponentialHistToExplicitHist(distributionFn string, explicitBounds distFn, ok := distributionFnMap[distributionFn] if !ok { - return nil, fmt.Errorf("invalid conversion function: %s, must be one of [upper, midpoint, random, uniform]", distributionFn) + return nil, fmt.Errorf("invalid distribution alogrithm: %s, must be one of [upper, midpoint, random, uniform]", distributionFn) } return func(_ context.Context, tCtx ottlmetric.TransformContext) (any, error) { @@ -72,24 +73,25 @@ func convertExponentialHistToExplicitHist(distributionFn string, explicitBounds return nil, nil } - bucketedHist := pmetric.NewHistogram() + explicitHist := pmetric.NewHistogram() dps := metric.ExponentialHistogram().DataPoints() - bucketedHist.SetAggregationTemporality(metric.ExponentialHistogram().AggregationTemporality()) + explicitHist.SetAggregationTemporality(metric.ExponentialHistogram().AggregationTemporality()) // map over each exponential histogram data point and calculate the bucket counts for i := 0; i < dps.Len(); i++ { expDataPoint := dps.At(i) - bucketCounts := distFn(expDataPoint, explicitBounds) - bucketHistDatapoint := bucketedHist.DataPoints().AppendEmpty() - bucketHistDatapoint.SetStartTimestamp(expDataPoint.StartTimestamp()) - bucketHistDatapoint.SetTimestamp(expDataPoint.Timestamp()) - bucketHistDatapoint.SetCount(expDataPoint.Count()) - bucketHistDatapoint.SetSum(expDataPoint.Sum()) - bucketHistDatapoint.SetMin(expDataPoint.Min()) - bucketHistDatapoint.SetMax(expDataPoint.Max()) - bucketHistDatapoint.ExplicitBounds().FromRaw(explicitBounds) - bucketHistDatapoint.BucketCounts().FromRaw(bucketCounts) - expDataPoint.Attributes().CopyTo(bucketHistDatapoint.Attributes()) + bucketCounts := calculateBucketCounts(expDataPoint, explicitBounds, distFn) + explicitHistDp := explicitHist.DataPoints().AppendEmpty() + explicitHistDp.SetStartTimestamp(expDataPoint.StartTimestamp()) + explicitHistDp.SetTimestamp(expDataPoint.Timestamp()) + explicitHistDp.SetCount(expDataPoint.Count()) + explicitHistDp.SetSum(expDataPoint.Sum()) + explicitHistDp.SetMin(expDataPoint.Min()) + explicitHistDp.SetMax(expDataPoint.Max()) + expDataPoint.Exemplars().CopyTo(explicitHistDp.Exemplars()) + explicitHistDp.ExplicitBounds().FromRaw(explicitBounds) + explicitHistDp.BucketCounts().FromRaw(bucketCounts) + expDataPoint.Attributes().CopyTo(explicitHistDp.Attributes()) } // create new metric and override metric @@ -97,203 +99,207 @@ func convertExponentialHistToExplicitHist(distributionFn string, explicitBounds newMetric.SetName(metric.Name()) newMetric.SetDescription(metric.Description()) newMetric.SetUnit(metric.Unit()) - bucketedHist.CopyTo(newMetric.SetEmptyHistogram()) + explicitHist.CopyTo(newMetric.SetEmptyHistogram()) newMetric.CopyTo(metric) return nil, nil }, nil } -// calculateBucketCountsWithUpperBounds function calculates the bucket counts for a given exponential histogram data point. -// The algorithm is inspired by the logExponentialHistogramDataPoints function used to Print Exponential Histograms in Otel. -// found here: https://github.com/open-telemetry/opentelemetry-collector/blob/main/exporter/internal/otlptext/databuffer.go#L144-L201 -// -// - factor is calculated as math.Ldexp(math.Ln2, -scale) -// -// - next we iterate the bucket counts and positions (pos) in the exponential histogram datapoint. -// -// - the index is calculated by adding the exponential offset to the positive bucket position (pos) -// -// - the factor is then used to calculate the upper bound of the bucket which is calculated as -// upper = math.Exp((index+1) * factor) -func calculateBucketCountsWithUpperBounds(dp pmetric.ExponentialHistogramDataPoint, boundaries []float64) []uint64 { +type distAlgorithm func(count uint64, upper, lower float64, boundaries []float64, bucketCountsSrc pcommon.UInt64Slice, bucketCountsDst *[]uint64) + +func calculateBucketCounts(dp pmetric.ExponentialHistogramDataPoint, boundaries []float64, distFn distAlgorithm) []uint64 { scale := int(dp.Scale()) factor := math.Ldexp(math.Ln2, -scale) posB := dp.Positive().BucketCounts() - bucketCounts := make([]uint64, len(boundaries)+1) // +1 for the overflow bucket + bucketCounts := make([]uint64, len(boundaries)) + + // add zerocount if boundary starts at zero + if zerocount := dp.ZeroCount(); zerocount > 0 && boundaries[0] == 0 { + bucketCounts[0] += zerocount + } +positionLoop: for pos := 0; pos < posB.Len(); pos++ { index := dp.Positive().Offset() + int32(pos) - // calculate the upper bound of the bucket upper := math.Exp(float64(index+1) * factor) + lower := math.Exp(float64(index) * factor) count := posB.At(pos) - // At this point we know that the upper bound represents the highest value that can be in this bucket, so we take the - // upper bound and compare it to each of the explicit boundaries provided by the user until we find a boundary - // that fits, that is, the first instance where upper bound <= explicit boundary. - for j, boundary := range boundaries { - if upper <= boundary { - bucketCounts[j] += count - break - } - if j == len(boundaries)-1 { - bucketCounts[j+1] += count // Overflow bucket + // check if lower and upper bounds are within the boundaries + for bIndex := 1; bIndex < len(boundaries); bIndex++ { + if lower > boundaries[bIndex-1] && upper <= boundaries[bIndex] { + bucketCounts[bIndex-1] += count + continue positionLoop } } + + // if the lower bound is greater than the last boundary, add the count to the overflow bucket + if lower > boundaries[len(boundaries)-1] { + bucketCounts[len(boundaries)-1] += count + continue + } + + distFn(count, upper, lower, boundaries, posB, &bucketCounts) } return bucketCounts } -// calculateBucketCountsWithMidpoint function calculates the bucket counts for a given exponential histogram data point. +// upperAlgorithm function calculates the bucket counts for a given exponential histogram data point. +// The algorithm is inspired by the logExponentialHistogramDataPoints function used to Print Exponential Histograms in Otel. +// found here: https://github.com/open-telemetry/opentelemetry-collector/blob/main/exporter/internal/otlptext/databuffer.go#L144-L201 +// +// - factor is calculated as math.Ldexp(math.Ln2, -scale) +// +// - next we iterate the bucket counts and positions (pos) in the exponential histogram datapoint. +// +// - the index is calculated by adding the exponential offset to the positive bucket position (pos) +// +// - the factor is then used to calculate the upper bound of the bucket which is calculated as +// upper = math.Exp((index+1) * factor) +var upperAlgorithm distAlgorithm = func(count uint64, + upper, _ float64, boundaries []float64, + bucketCountsSrc pcommon.UInt64Slice, bucketCountsDst *[]uint64) { + // count := bucketCountsSrc.At(index) + + // At this point we know that the upper bound represents the highest value that can be in this bucket, so we take the + // upper bound and compare it to each of the explicit boundaries provided by the user until we find a boundary + // that fits, that is, the first instance where upper bound <= explicit boundary. + for j, boundary := range boundaries { + if upper <= boundary { + (*bucketCountsDst)[j] += count + return + } + } + (*bucketCountsDst)[len(boundaries)-1] += count // Overflow bucket +} + +// midpointAlgorithm calculates the bucket counts for a given exponential histogram data point. // This algorithm is similar to calculateBucketCountsWithUpperBounds, but instead of using the upper bound of the bucket // to determine the bucket, it uses the midpoint of the upper and lower bounds. // The midpoint is calculated as (upper + lower) / 2. -func calculateBucketCountsWithMidpoint(dp pmetric.ExponentialHistogramDataPoint, boundaries []float64) []uint64 { - scale := int(dp.Scale()) - factor := math.Ldexp(math.Ln2, -scale) - posB := dp.Positive().BucketCounts() - bucketCounts := make([]uint64, len(boundaries)+1) // +1 for the overflow bucket - - for pos := 0; pos < posB.Len(); pos++ { - index := dp.Positive().Offset() + int32(pos) - upper := math.Exp(float64(index+1) * factor) - lower := math.Exp(float64(index) * factor) - midpoint := (upper + lower) / 2 - count := posB.At(pos) - - for j, boundary := range boundaries { - if midpoint <= boundary { - bucketCounts[j] += count - break - } - if j == len(boundaries)-1 { - bucketCounts[j+1] += count // Overflow bucket +var midpointAlgorithm distAlgorithm = func(count uint64, + upper, lower float64, boundaries []float64, + bucketCountsSrc pcommon.UInt64Slice, bucketCountsDst *[]uint64) { + midpoint := (upper + lower) / 2 + + for j, boundary := range boundaries { + if midpoint <= boundary { + if j > 0 { + (*bucketCountsDst)[j-1] += count + return } + (*bucketCountsDst)[j] += count + return } } - - return bucketCounts + (*bucketCountsDst)[len(boundaries)-1] += count // Overflow bucket } -// calculateBucketCountsWithUniformDistribution distributes counts from an exponential histogram data point into a set of linear boundaries using uniform distribution -func calculateBucketCountsWithUniformDistribution(dp pmetric.ExponentialHistogramDataPoint, boundaries []float64) []uint64 { - scale := int(dp.Scale()) - factor := math.Ldexp(math.Ln2, -scale) - posB := dp.Positive().BucketCounts() - bucketCounts := make([]uint64, len(boundaries)+1) // +1 for the overflow bucket - - for pos := 0; pos < posB.Len(); pos++ { - index := dp.Positive().Offset() + int32(pos) - lower := math.Exp(float64(index) * factor) - upper := math.Exp(float64(index+1) * factor) - count := posB.At(pos) +// uniformAlgorithm distributes counts from a given set of bucket sounrces into a set of linear boundaries using uniform distribution +var uniformAlgorithm distAlgorithm = func(count uint64, + upper, lower float64, boundaries []float64, + bucketCountsSrc pcommon.UInt64Slice, bucketCountsDst *[]uint64) { - // Find the boundaries that intersect with the bucket range - var start, end int - for start = 0; start < len(boundaries); start++ { - if lower <= boundaries[start] { - break - } + // Find the boundaries that intersect with the bucket range + var start, end int + for start = 0; start < len(boundaries); start++ { + if lower <= boundaries[start] { + break } - for end = start; end < len(boundaries); end++ { - if upper <= boundaries[end] { - break - } + } + + for end = start; end < len(boundaries); end++ { + if upper <= boundaries[end] { + break } + } - // Distribute the count uniformly across the intersecting boundaries - if end > start { - countPerBoundary := count / uint64(end-start+1) - remainder := count % uint64(end-start+1) - - for j := start; j <= end; j++ { - bucketCounts[j] += countPerBoundary - if remainder > 0 { - bucketCounts[j]++ - remainder-- - } + // make sure end value does not exceed the length of the boundaries + if end > len(boundaries)-1 { + end = len(boundaries) - 1 + } + + // Distribute the count uniformly across the intersecting boundaries + if end > start { + countPerBoundary := count / uint64(end-start+1) + remainder := count % uint64(end-start+1) + + for j := start; j <= end; j++ { + (*bucketCountsDst)[j] += countPerBoundary + if remainder > 0 { + (*bucketCountsDst)[j]++ + remainder-- } - } else { - // Handle the case where the bucket range does not intersect with any boundaries - bucketCounts[start] += count } + } else { + // Handle the case where the bucket range does not intersect with any boundaries + (*bucketCountsDst)[start] += count } - - return bucketCounts } -// calculateBucketCountsWithRandomDistribution distributes counts from an exponential histogram data point into a set of linear boundaries using random distribution -func calculateBucketCountsWithRandomDistribution(dp pmetric.ExponentialHistogramDataPoint, boundaries []float64) []uint64 { - rand.Seed(uint64(time.Now().UnixNano())) // Seed the random number generator - scale := int(dp.Scale()) - // factor is used to scale the exponential histogram - factor := math.Ldexp(math.Ln2, -scale) - posB := dp.Positive().BucketCounts() - bucketCounts := make([]uint64, len(boundaries)+1) // +1 for the overflow bucket - - for pos := 0; pos < posB.Len(); pos++ { - // Calculate the lower and upper bounds of the current bucket - index := dp.Positive().Offset() + int32(pos) - lower := math.Exp(float64(index) * factor) - upper := math.Exp(float64(index+1) * factor) - count := posB.At(pos) +// randomAlgorithm distributes counts from a given set of bucket sources into a set of linear boundaries using random distribution +var randomAlgorithm distAlgorithm = func(count uint64, + upper, lower float64, boundaries []float64, + bucketCountsSrc pcommon.UInt64Slice, bucketCountsDst *[]uint64) { + // Find the boundaries that intersect with the bucket range + start := 0 + for start < len(boundaries) && boundaries[start] < lower { + start++ + } + end := start + for end < len(boundaries) && boundaries[end] < upper { + end++ + } - // Find the boundaries that intersect with the bucket range - start := 0 - for start < len(boundaries) && boundaries[start] < lower { - start++ - } - end := start - for end < len(boundaries) && boundaries[end] < upper { - end++ - } + // make sure end value does not exceed the length of the boundaries + if end > len(boundaries)-1 { + end = len(boundaries) - 1 + } - // Randomly distribute the count across the intersecting boundaries - if end > start { - rangeWidth := upper - lower - totalAllocated := uint64(0) - - for j := start; j <= end; j++ { - var boundaryLower, boundaryUpper float64 - if j == 0 { - // For the first boundary, set the lower limit to the bucket's lower bound - boundaryLower = lower - } else { - // Otherwise, set it to the previous boundary - boundaryLower = boundaries[j-1] - } - if j == len(boundaries) { - // For the last boundary, set the upper limit to the bucket's upper bound - boundaryUpper = upper - } else { - // Otherwise, set it to the current boundary - boundaryUpper = boundaries[j] - } - - // Calculate the overlap width between the boundary range and the bucket range - overlapWidth := math.Min(boundaryUpper, upper) - math.Max(boundaryLower, lower) - // Proportionally allocate the count based on the overlap width - allocatedCount := uint64(float64(count) * (overlapWidth / rangeWidth)) - - // Randomly assign the counts to the boundaries - randomlyAllocatedCount := uint64(rand.Float64() * float64(allocatedCount)) - bucketCounts[j] += randomlyAllocatedCount - totalAllocated += randomlyAllocatedCount + // Randomly distribute the count across the intersecting boundaries + if end > start { + rangeWidth := upper - lower + totalAllocated := uint64(0) + + for j := start; j <= end; j++ { + var boundaryLower, boundaryUpper float64 + if j == 0 { + // For the first boundary, set the lower limit to the bucket's lower bound + boundaryLower = lower + } else { + // Otherwise, set it to the previous boundary + boundaryLower = boundaries[j-1] } - - // Distribute any remaining count - remainingCount := count - totalAllocated - for remainingCount > 0 { - randomBoundary := rand.Intn(end-start+1) + start - bucketCounts[randomBoundary]++ - remainingCount-- + if j == len(boundaries) { + // For the last boundary, set the upper limit to the bucket's upper bound + boundaryUpper = upper + } else { + // Otherwise, set it to the current boundary + boundaryUpper = boundaries[j] } - } else { - // If the bucket range does not intersect with any boundaries, assign the entire count to the start boundary - bucketCounts[start] += count + + // Calculate the overlap width between the boundary range and the bucket range + overlapWidth := math.Min(boundaryUpper, upper) - math.Max(boundaryLower, lower) + // Proportionally allocate the count based on the overlap width + allocatedCount := uint64(float64(count) * (overlapWidth / rangeWidth)) + + // Randomly assign the counts to the boundaries + randomlyAllocatedCount := uint64(rand.Float64() * float64(allocatedCount)) + (*bucketCountsDst)[j] += randomlyAllocatedCount + totalAllocated += randomlyAllocatedCount } - } - return bucketCounts + // Distribute any remaining count + remainingCount := count - totalAllocated + for remainingCount > 0 { + randomBoundary := rand.Intn(end-start+1) + start + (*bucketCountsDst)[randomBoundary]++ + remainingCount-- + } + } else { + // If the bucket range does not intersect with any boundaries, assign the entire count to the start boundary + (*bucketCountsDst)[start] += count + } } From 7448835886a49092062d676fce08a0242cf60442 Mon Sep 17 00:00:00 2001 From: Shaun Remekie Date: Wed, 14 Aug 2024 00:46:26 +0200 Subject: [PATCH 30/46] updated readme --- processor/transformprocessor/README.md | 59 ++++++++++++++++++++++---- 1 file changed, 51 insertions(+), 8 deletions(-) diff --git a/processor/transformprocessor/README.md b/processor/transformprocessor/README.md index d472b0423213..5bca87ddb15d 100644 --- a/processor/transformprocessor/README.md +++ b/processor/transformprocessor/README.md @@ -366,29 +366,72 @@ The `convert_exponential_hist_to_explicit_hist` function converts an Exponential This function requires 2 arguments: -- `distribution` - This argument defines the convertion algorithm used to distribute the exponential datapoints into a new Explicit Histogram. There are 4 distribution options: +- `distribution` - This argument defines the distribution algorithm used to allocate the exponential histogram datapoints into a new Explicit Histogram. There are 4 options:
- __upper__ - This approach identifies the highest possible value of each exponential bucket (_the upper bound_) and uses it to distribute the datapoints by comparing the upper bound of each bucket with the ExplicitBounds provided. This approach works better for small/narrow exponential histograms where the difference between the upper bounds and lower bounds are small. + + _For example, Given:_ + 1. count = 10 + 2. Boundaries: [5, 10, 15, 20, 25] + 3. Upper Bound: 15 + _Process:_ + 4. Start with zeros: [0, 0, 0, 0, 0] + 5. Iterate the boundaries and compare $upper = 15$ with each boundary: + - $15>5$ (_skip_) + - $15>10$ (_skip_) + - $15<=15$ (allocate count to this boundary) + 6. Allocate count: [0, 0, __10__, 0, 0] + 7. Final Counts: [0, 0, __10__, 0, 0]
- __midpoint__ - This approach works in a similar way to the __upper__ approach, but instead of using the upper bound, it uses the midpoint of each exponential bucket. The midpoint is identified by calculationg the average of the upper and lower bounds. This approach also works better for small/narrow exponential histograms.
- - __uniform__ - This approach distributes the datapoints for each bucket uniformly across the overlapping __ExplicitBounds__. This approach works better for large/wide exponential histograms. + + >The __uniform__ and __random__ distribution algorithms both utilise the concept of intersecting boundaries. + Intersecting boundaries are any boundary in the `boundaries array` that falls between or on the lower and upper values of the Exponential Histogram bounderies. + _For Example:_ if you have an Exponential Histogram bucket with a lower bound of 10 and upper of 20, and your boundaries array is [5, 10, 15, 20, 25], the intersecting boundaries are 10, 15, and 20 because they lie within the range [10, 20]. +
+ - __uniform__ - This approach distributes the datapoints for each bucket uniformly across the intersecting __ExplicitBounds__. The alogrithm works as follows: + + - If there are valid intersecting boundaries, the function evenly distributes the count across these boundaries. + - Calculate the count to be allocated to each boundary. + - If there is a remainder after dividing the count equally, it distributes the remainder by incrementing the count for some of the boundaries until the remainder is exhausted. + + _For example Given:_ + 1. count = 10 + 2. Exponential Histogram Bounds: [10, 20] + 3. Boundaries: [5, 10, 15, 20, 25] + 4. Intersecting Boundaries: [10, 15, 20] + 5. Number of Intersecting Boundaries: 3 + 6. Using the formula: $count/numOfIntersections=10/3=3r1$ + _Uniform Allocation:_ + 7. Start with zeros: [0, 0, 0, 0, 0] + 8. Allocate 3 to each: [0, 3, 3, 3, 0] + 9. Distribute remainder $r$ 1: [0, 4, 3, 3, 0] + 10. Final Counts: [0, 4, 3, 3, 0]
- - __random__ - This approach distributes the datapoints for each bucket randomly across the overlapping __ExplicitBounds__. This approach works better for large/wide exponential histograms. + - __random__ - This approach distributes the datapoints for each bucket randomly across the intersecting __ExplicitBounds__. This approach works in a similar manner to the uniform distribution algorithm with the main difference being that points are distributed randomly instead of uniformly. This works as follows: + - If there are valid intersecting boundaries, calculate the proportion of the count that should be allocated to each boundary based on the overlap of the boundary with the provided range (lower to upper). + - For each boundary, a random fraction of the calculated proportion is allocated. + - Any remaining count (_due to rounding or random distribution_) is then distributed randomly among the intersecting boundaries. + - If the bucket range does not intersect with any boundaries, the entire count is assigned to the start boundary.
- `ExplicitBounds` represents the list of bucket boundaries for the new histogram. This argument is __required__ and __cannot be empty__. -__WARNING:__ +__WARNINGS:__ + +- The process of converting an ExponentialHistogram to an Explicit Histogram is not perfect and may result in a loss of precision. It is important to define an appropriate set of bucket boundaries and identify the best distribution approach for your data in order to minimize this loss. + + For example, selecting Boundaries that are too high or too low may result histogram buckets that are too wide or too narrow, respectively. -The process of converting an ExponentialHistogram to an Explicit Histogram is not perfect and may result in a loss of precision. It is important to define an appropriate set of bucket boundaries and identify the best distribution approach for your data in order to minimize this loss. +- __Negative Bucket Counts__ are not supported in Explicit Histograms, as such negative bucket count are ignored. -For example, selecting Boundaries that are too high or too low may result histogram buckets that are too wide or too narrow, respectively. +- __ZeroCounts__ are only allocated if the ExplicitBounds array contains a zero boundary. That is, if the Explicit Boundaries that you provide does not start with `0`, the function will not allocate any zero counts from the Exponential Histogram. This function should only be used when Exponential Histograms are not suitable for the downstream consumers or if upstream metric sources are unable to generate Explicit Histograms. -Example: +__Example__: -- `convert_exponential_hist_to_explicit_hist("random", [10.0, 100.0, 1000.0, 10000.0])` +- `convert_exponential_hist_to_explicit_hist("random", [0.0, 10.0, 100.0, 1000.0, 10000.0])` ### scale_metric From be61c0d644bc8451906386ca8789a84a060833b5 Mon Sep 17 00:00:00 2001 From: Shaun Remekie Date: Wed, 14 Aug 2024 00:49:14 +0200 Subject: [PATCH 31/46] Update processor/transformprocessor/README.md Co-authored-by: Kent Quirk --- processor/transformprocessor/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/processor/transformprocessor/README.md b/processor/transformprocessor/README.md index 5bca87ddb15d..2a7a051690a9 100644 --- a/processor/transformprocessor/README.md +++ b/processor/transformprocessor/README.md @@ -383,7 +383,7 @@ This function requires 2 arguments: 6. Allocate count: [0, 0, __10__, 0, 0] 7. Final Counts: [0, 0, __10__, 0, 0]
- - __midpoint__ - This approach works in a similar way to the __upper__ approach, but instead of using the upper bound, it uses the midpoint of each exponential bucket. The midpoint is identified by calculationg the average of the upper and lower bounds. This approach also works better for small/narrow exponential histograms. + - __midpoint__ - This approach works in a similar way to the __upper__ approach, but instead of using the upper bound, it uses the midpoint of each exponential bucket. The midpoint is identified by calculating the average of the upper and lower bounds. This approach also works better for small/narrow exponential histograms.
>The __uniform__ and __random__ distribution algorithms both utilise the concept of intersecting boundaries. From 3f57f3612009f2535bceaa645e37830ceb2bb327 Mon Sep 17 00:00:00 2001 From: Shaun Remekie Date: Tue, 3 Sep 2024 19:06:39 +0200 Subject: [PATCH 32/46] Update processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist.go fix typo Co-authored-by: Tyler Helmuth <12352919+TylerHelmuth@users.noreply.github.com> --- .../metrics/func_convert_exponential_hist_to_explicit_hist.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist.go b/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist.go index 736deac47960..3faf9e605c76 100644 --- a/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist.go +++ b/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist.go @@ -62,7 +62,7 @@ func convertExponentialHistToExplicitHist(distributionFn string, explicitBounds distFn, ok := distributionFnMap[distributionFn] if !ok { - return nil, fmt.Errorf("invalid distribution alogrithm: %s, must be one of [upper, midpoint, random, uniform]", distributionFn) + return nil, fmt.Errorf("invalid distribution algorithm: %s, must be one of [upper, midpoint, random, uniform]", distributionFn) } return func(_ context.Context, tCtx ottlmetric.TransformContext) (any, error) { From c19b23d6f3c5d927e25c809ec281eea6311d12a1 Mon Sep 17 00:00:00 2001 From: Shaun Remekie Date: Tue, 3 Sep 2024 19:56:50 +0200 Subject: [PATCH 33/46] update function name and add warning message --- processor/transformprocessor/README.md | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/processor/transformprocessor/README.md b/processor/transformprocessor/README.md index 2a7a051690a9..25cbe3b431c7 100644 --- a/processor/transformprocessor/README.md +++ b/processor/transformprocessor/README.md @@ -220,7 +220,7 @@ In addition to OTTL functions, the processor defines its own functions to help w - [copy_metric](#copy_metric) - [scale_metric](#scale_metric) - [aggregate_on_attributes](#aggregate_on_attributes) -- [convert_exponential_hist_to_explicit_hist](#convert_exponential_hist_to_explicit_hist) +- [convert_exponential_histogram_to_explicit_histogram](#convert_exponential_histogram_to_explicit_histogram) ### convert_sum_to_gauge @@ -358,11 +358,13 @@ Examples: - `copy_metric(desc="new desc") where description == "old desc"` -### convert_exponential_hist_to_explicit_hist +### convert_exponential_histogram_to_explicit_histogram -`convert_exponential_hist_to_explicit_hist(distribution, [ExplicitBounds])` +__Warning:__ The approach used in this function to convert exponential histograms to explicit histograms __is not__ part of the __OpenTelemetry Specification__. -The `convert_exponential_hist_to_explicit_hist` function converts an ExponentialHistogram to an Explicit (_normal_) Histogram. +`convert_exponential_histogram_to_explicit_histogram(distribution, [ExplicitBounds])` + +The `convert_exponential_histogram_to_explicit_histogram` function converts an ExponentialHistogram to an Explicit (_normal_) Histogram. This function requires 2 arguments: @@ -423,7 +425,7 @@ __WARNINGS:__ For example, selecting Boundaries that are too high or too low may result histogram buckets that are too wide or too narrow, respectively. -- __Negative Bucket Counts__ are not supported in Explicit Histograms, as such negative bucket count are ignored. +- __Negative Bucket Counts__ are not supported in Explicit Histograms, as such negative bucket counts are ignored. - __ZeroCounts__ are only allocated if the ExplicitBounds array contains a zero boundary. That is, if the Explicit Boundaries that you provide does not start with `0`, the function will not allocate any zero counts from the Exponential Histogram. @@ -431,7 +433,7 @@ This function should only be used when Exponential Histograms are not suitable f __Example__: -- `convert_exponential_hist_to_explicit_hist("random", [0.0, 10.0, 100.0, 1000.0, 10000.0])` +- `convert_exponential_histogram_to_explicit_histogram("random", [0.0, 10.0, 100.0, 1000.0, 10000.0])` ### scale_metric From 33b636e9bee1b2d6192d4d09167f9442500ad596 Mon Sep 17 00:00:00 2001 From: Shaun Remekie Date: Tue, 3 Sep 2024 19:57:17 +0200 Subject: [PATCH 34/46] fixed function name --- processor/transformprocessor/internal/metrics/functions_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/processor/transformprocessor/internal/metrics/functions_test.go b/processor/transformprocessor/internal/metrics/functions_test.go index 8826cf8a3a4f..d36f5b9199bd 100644 --- a/processor/transformprocessor/internal/metrics/functions_test.go +++ b/processor/transformprocessor/internal/metrics/functions_test.go @@ -66,7 +66,7 @@ func Test_MetricFunctions(t *testing.T) { expected["extract_count_metric"] = newExtractCountMetricFactory() expected["copy_metric"] = newCopyMetricFactory() expected["scale_metric"] = newScaleMetricFactory() - expected["convert_exponential_hist_to_explicit_hist"] = newconvertExponentialHistToExplicitHistFactory() + expected["convert_exponential_histogram_to_explicit_histogram"] = newconvertExponentialHistToExplicitHistFactory() actual := MetricFunctions() require.Equal(t, len(expected), len(actual)) From 7b5447f14f34e8db44ae153e39d332fce5ca716e Mon Sep 17 00:00:00 2001 From: Shaun Remekie Date: Tue, 3 Sep 2024 19:58:48 +0200 Subject: [PATCH 35/46] remove GOTO from distFn loop and changed transform function name --- ...nvert_exponential_hist_to_explicit_hist.go | 21 +++++++++++-------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist.go b/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist.go index 3faf9e605c76..570a86b7f3b0 100644 --- a/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist.go +++ b/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist.go @@ -30,7 +30,7 @@ var distributionFnMap = map[string]distAlgorithm{ } func newconvertExponentialHistToExplicitHistFactory() ottl.Factory[ottlmetric.TransformContext] { - return ottl.NewFactory("convert_exponential_hist_to_explicit_hist", + return ottl.NewFactory("convert_exponential_histogram_to_explicit_histogram", &convertExponentialHistToExplicitHistArguments{}, createconvertExponentialHistToExplicitHistFunction) } @@ -119,28 +119,31 @@ func calculateBucketCounts(dp pmetric.ExponentialHistogramDataPoint, boundaries bucketCounts[0] += zerocount } -positionLoop: for pos := 0; pos < posB.Len(); pos++ { index := dp.Positive().Offset() + int32(pos) upper := math.Exp(float64(index+1) * factor) lower := math.Exp(float64(index) * factor) count := posB.At(pos) + runDistFn := true + + // if the lower bound is greater than the last boundary, add the count to the overflow bucket + if lower > boundaries[len(boundaries)-1] { + bucketCounts[len(boundaries)-1] += count + continue + } // check if lower and upper bounds are within the boundaries for bIndex := 1; bIndex < len(boundaries); bIndex++ { if lower > boundaries[bIndex-1] && upper <= boundaries[bIndex] { bucketCounts[bIndex-1] += count - continue positionLoop + runDistFn = false + break } } - // if the lower bound is greater than the last boundary, add the count to the overflow bucket - if lower > boundaries[len(boundaries)-1] { - bucketCounts[len(boundaries)-1] += count - continue + if runDistFn { + distFn(count, upper, lower, boundaries, posB, &bucketCounts) } - - distFn(count, upper, lower, boundaries, posB, &bucketCounts) } return bucketCounts From d080b9e0fd5ab2b945b844379ae29fc0df2728b8 Mon Sep 17 00:00:00 2001 From: Shaun Remekie Date: Tue, 3 Sep 2024 21:07:41 +0200 Subject: [PATCH 36/46] adjust function name --- processor/transformprocessor/README.md | 10 +++++----- .../func_convert_exponential_hist_to_explicit_hist.go | 2 +- .../internal/metrics/functions_test.go | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/processor/transformprocessor/README.md b/processor/transformprocessor/README.md index 25cbe3b431c7..eb9b863c0ba2 100644 --- a/processor/transformprocessor/README.md +++ b/processor/transformprocessor/README.md @@ -220,7 +220,7 @@ In addition to OTTL functions, the processor defines its own functions to help w - [copy_metric](#copy_metric) - [scale_metric](#scale_metric) - [aggregate_on_attributes](#aggregate_on_attributes) -- [convert_exponential_histogram_to_explicit_histogram](#convert_exponential_histogram_to_explicit_histogram) +- [convert_exponential_histogram_to_histogram](#convert_exponential_histogram_to_histogram) ### convert_sum_to_gauge @@ -358,13 +358,13 @@ Examples: - `copy_metric(desc="new desc") where description == "old desc"` -### convert_exponential_histogram_to_explicit_histogram +### convert_exponential_histogram_to_histogram __Warning:__ The approach used in this function to convert exponential histograms to explicit histograms __is not__ part of the __OpenTelemetry Specification__. -`convert_exponential_histogram_to_explicit_histogram(distribution, [ExplicitBounds])` +`convert_exponential_histogram_to_histogram(distribution, [ExplicitBounds])` -The `convert_exponential_histogram_to_explicit_histogram` function converts an ExponentialHistogram to an Explicit (_normal_) Histogram. +The `convert_exponential_histogram_to_histogram` function converts an ExponentialHistogram to an Explicit (_normal_) Histogram. This function requires 2 arguments: @@ -433,7 +433,7 @@ This function should only be used when Exponential Histograms are not suitable f __Example__: -- `convert_exponential_histogram_to_explicit_histogram("random", [0.0, 10.0, 100.0, 1000.0, 10000.0])` +- `convert_exponential_histogram_to_histogram("random", [0.0, 10.0, 100.0, 1000.0, 10000.0])` ### scale_metric diff --git a/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist.go b/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist.go index 570a86b7f3b0..2aa53c7de106 100644 --- a/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist.go +++ b/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist.go @@ -30,7 +30,7 @@ var distributionFnMap = map[string]distAlgorithm{ } func newconvertExponentialHistToExplicitHistFactory() ottl.Factory[ottlmetric.TransformContext] { - return ottl.NewFactory("convert_exponential_histogram_to_explicit_histogram", + return ottl.NewFactory("convert_exponential_histogram_to_histogram", &convertExponentialHistToExplicitHistArguments{}, createconvertExponentialHistToExplicitHistFunction) } diff --git a/processor/transformprocessor/internal/metrics/functions_test.go b/processor/transformprocessor/internal/metrics/functions_test.go index d36f5b9199bd..24cd4db2d6ad 100644 --- a/processor/transformprocessor/internal/metrics/functions_test.go +++ b/processor/transformprocessor/internal/metrics/functions_test.go @@ -66,7 +66,7 @@ func Test_MetricFunctions(t *testing.T) { expected["extract_count_metric"] = newExtractCountMetricFactory() expected["copy_metric"] = newCopyMetricFactory() expected["scale_metric"] = newScaleMetricFactory() - expected["convert_exponential_histogram_to_explicit_histogram"] = newconvertExponentialHistToExplicitHistFactory() + expected["convert_exponential_histogram_to_histogram"] = newconvertExponentialHistToExplicitHistFactory() actual := MetricFunctions() require.Equal(t, len(expected), len(actual)) From 456a4641b7ff0e4c8f7165bcaf409ee2edb1fb82 Mon Sep 17 00:00:00 2001 From: Shaun Remekie Date: Tue, 3 Sep 2024 22:59:54 +0200 Subject: [PATCH 37/46] fix typos --- processor/transformprocessor/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/processor/transformprocessor/README.md b/processor/transformprocessor/README.md index eb9b863c0ba2..d9f6476970d5 100644 --- a/processor/transformprocessor/README.md +++ b/processor/transformprocessor/README.md @@ -389,10 +389,10 @@ This function requires 2 arguments:
>The __uniform__ and __random__ distribution algorithms both utilise the concept of intersecting boundaries. - Intersecting boundaries are any boundary in the `boundaries array` that falls between or on the lower and upper values of the Exponential Histogram bounderies. + Intersecting boundaries are any boundary in the `boundaries array` that falls between or on the lower and upper values of the Exponential Histogram boundaries. _For Example:_ if you have an Exponential Histogram bucket with a lower bound of 10 and upper of 20, and your boundaries array is [5, 10, 15, 20, 25], the intersecting boundaries are 10, 15, and 20 because they lie within the range [10, 20].
- - __uniform__ - This approach distributes the datapoints for each bucket uniformly across the intersecting __ExplicitBounds__. The alogrithm works as follows: + - __uniform__ - This approach distributes the datapoints for each bucket uniformly across the intersecting __ExplicitBounds__. The algorithm works as follows: - If there are valid intersecting boundaries, the function evenly distributes the count across these boundaries. - Calculate the count to be allocated to each boundary. From 3039e9077dc7542f6e9d54b5022feb65a068abe5 Mon Sep 17 00:00:00 2001 From: Shaun Remekie Date: Thu, 5 Sep 2024 11:36:26 +0200 Subject: [PATCH 38/46] fix linting issues --- ...c_convert_exponential_hist_to_explicit_hist.go | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist.go b/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist.go index 2aa53c7de106..9472cdf3e99f 100644 --- a/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist.go +++ b/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist.go @@ -1,14 +1,13 @@ // Copyright The OpenTelemetry Authors // SPDX-License-Identifier: Apache-2.0 -package metrics +package metrics // import "github.com/open-telemetry/opentelemetry-collector-contrib/processor/transformprocessor/internal/metrics" import ( "context" "fmt" "math" - "go.opentelemetry.io/collector/pdata/pcommon" "go.opentelemetry.io/collector/pdata/pmetric" "golang.org/x/exp/rand" @@ -106,7 +105,7 @@ func convertExponentialHistToExplicitHist(distributionFn string, explicitBounds }, nil } -type distAlgorithm func(count uint64, upper, lower float64, boundaries []float64, bucketCountsSrc pcommon.UInt64Slice, bucketCountsDst *[]uint64) +type distAlgorithm func(count uint64, upper, lower float64, boundaries []float64, bucketCountsDst *[]uint64) func calculateBucketCounts(dp pmetric.ExponentialHistogramDataPoint, boundaries []float64, distFn distAlgorithm) []uint64 { scale := int(dp.Scale()) @@ -142,7 +141,7 @@ func calculateBucketCounts(dp pmetric.ExponentialHistogramDataPoint, boundaries } if runDistFn { - distFn(count, upper, lower, boundaries, posB, &bucketCounts) + distFn(count, upper, lower, boundaries, &bucketCounts) } } @@ -163,7 +162,7 @@ func calculateBucketCounts(dp pmetric.ExponentialHistogramDataPoint, boundaries // upper = math.Exp((index+1) * factor) var upperAlgorithm distAlgorithm = func(count uint64, upper, _ float64, boundaries []float64, - bucketCountsSrc pcommon.UInt64Slice, bucketCountsDst *[]uint64) { + bucketCountsDst *[]uint64) { // count := bucketCountsSrc.At(index) // At this point we know that the upper bound represents the highest value that can be in this bucket, so we take the @@ -184,7 +183,7 @@ var upperAlgorithm distAlgorithm = func(count uint64, // The midpoint is calculated as (upper + lower) / 2. var midpointAlgorithm distAlgorithm = func(count uint64, upper, lower float64, boundaries []float64, - bucketCountsSrc pcommon.UInt64Slice, bucketCountsDst *[]uint64) { + bucketCountsDst *[]uint64) { midpoint := (upper + lower) / 2 for j, boundary := range boundaries { @@ -203,7 +202,7 @@ var midpointAlgorithm distAlgorithm = func(count uint64, // uniformAlgorithm distributes counts from a given set of bucket sounrces into a set of linear boundaries using uniform distribution var uniformAlgorithm distAlgorithm = func(count uint64, upper, lower float64, boundaries []float64, - bucketCountsSrc pcommon.UInt64Slice, bucketCountsDst *[]uint64) { + bucketCountsDst *[]uint64) { // Find the boundaries that intersect with the bucket range var start, end int @@ -245,7 +244,7 @@ var uniformAlgorithm distAlgorithm = func(count uint64, // randomAlgorithm distributes counts from a given set of bucket sources into a set of linear boundaries using random distribution var randomAlgorithm distAlgorithm = func(count uint64, upper, lower float64, boundaries []float64, - bucketCountsSrc pcommon.UInt64Slice, bucketCountsDst *[]uint64) { + bucketCountsDst *[]uint64) { // Find the boundaries that intersect with the bucket range start := 0 for start < len(boundaries) && boundaries[start] < lower { From 71db112c283f64a5c4415eb228675baa26e7d88d Mon Sep 17 00:00:00 2001 From: Shaun Remekie Date: Mon, 16 Sep 2024 19:04:53 +0200 Subject: [PATCH 39/46] go mod tidy on transformprocessor --- processor/transformprocessor/go.mod | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/processor/transformprocessor/go.mod b/processor/transformprocessor/go.mod index c3d2726c21bf..12a79675d1a9 100644 --- a/processor/transformprocessor/go.mod +++ b/processor/transformprocessor/go.mod @@ -25,7 +25,10 @@ require ( go.uber.org/zap v1.27.0 ) -require go.opentelemetry.io/collector/consumer/consumertest v0.109.1-0.20240911162712-6c2697c4453c +require ( + go.opentelemetry.io/collector/consumer/consumertest v0.109.1-0.20240911162712-6c2697c4453c + golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842 +) require ( github.com/alecthomas/participle/v2 v2.1.1 // indirect @@ -73,7 +76,6 @@ require ( go.opentelemetry.io/otel/exporters/prometheus v0.52.0 // indirect go.opentelemetry.io/otel/sdk v1.30.0 // indirect go.opentelemetry.io/otel/sdk/metric v1.30.0 // indirect - golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842 // indirect golang.org/x/net v0.29.0 // indirect golang.org/x/sys v0.25.0 // indirect golang.org/x/text v0.18.0 // indirect From f3a98d3a792530d48cc3c95f84e720a34fd8321f Mon Sep 17 00:00:00 2001 From: Shaun Remekie Date: Tue, 17 Sep 2024 22:03:47 +0200 Subject: [PATCH 40/46] go mod tidy --- processor/transformprocessor/go.mod | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/processor/transformprocessor/go.mod b/processor/transformprocessor/go.mod index 2bceed3e77b1..78e08cad8146 100644 --- a/processor/transformprocessor/go.mod +++ b/processor/transformprocessor/go.mod @@ -25,8 +25,10 @@ require ( go.uber.org/zap v1.27.0 ) -require go.opentelemetry.io/collector/consumer/consumertest v0.109.1-0.20240916171416-d3a171889c53 - +require ( + go.opentelemetry.io/collector/consumer/consumertest v0.109.1-0.20240916171416-d3a171889c53 + golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842 +) require ( github.com/alecthomas/participle/v2 v2.1.1 // indirect From 3fac936cba27b536024d219bb383f7b899a1d83b Mon Sep 17 00:00:00 2001 From: Shaun Remekie Date: Wed, 18 Sep 2024 15:01:22 +0200 Subject: [PATCH 41/46] fixed linting issue --- .../func_convert_exponential_hist_to_explicit_hist_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist_test.go b/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist_test.go index 4f16562f8ea0..3e11f2ff7e40 100644 --- a/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist_test.go +++ b/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist_test.go @@ -757,7 +757,7 @@ func TestRandom_convert_exponential_hist_to_explicit_hist(t *testing.T) { // since the max value in the exponential histogram is 245 // we can assert that the overflow bucket has a count > 0 overflow := dp.BucketCounts().At(dp.BucketCounts().Len() - 1) - assert.Greater(t, overflow, uint64(0), "overflow bucket count should be > 0") + assert.Positive(t, overflow, uint64(0), "overflow bucket count should be > 0") return } From 6e59f0ac78000995a718b2dd28ebc09868afd3fb Mon Sep 17 00:00:00 2001 From: Shaun Remekie Date: Wed, 18 Sep 2024 16:30:37 +0200 Subject: [PATCH 42/46] Update processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist_test.go Co-authored-by: Tyler Helmuth <12352919+TylerHelmuth@users.noreply.github.com> --- .../func_convert_exponential_hist_to_explicit_hist_test.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist_test.go b/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist_test.go index 3e11f2ff7e40..e11875674c9a 100644 --- a/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist_test.go +++ b/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist_test.go @@ -8,10 +8,11 @@ import ( "testing" "time" - "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl/contexts/ottlmetric" "github.com/stretchr/testify/assert" "go.opentelemetry.io/collector/pdata/pcommon" "go.opentelemetry.io/collector/pdata/pmetric" + + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl/contexts/ottlmetric" ) var nonExponentialHist = func() pmetric.Metric { From c1fa8eacde48c1e99e92ad5c43480e4c385dcc37 Mon Sep 17 00:00:00 2001 From: Shaun Remekie Date: Wed, 18 Sep 2024 19:55:00 +0200 Subject: [PATCH 43/46] Update processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist_test.go Co-authored-by: Tyler Helmuth <12352919+TylerHelmuth@users.noreply.github.com> --- .../func_convert_exponential_hist_to_explicit_hist_test.go | 1 - 1 file changed, 1 deletion(-) diff --git a/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist_test.go b/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist_test.go index e11875674c9a..8bcc90bfd361 100644 --- a/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist_test.go +++ b/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist_test.go @@ -11,7 +11,6 @@ import ( "github.com/stretchr/testify/assert" "go.opentelemetry.io/collector/pdata/pcommon" "go.opentelemetry.io/collector/pdata/pmetric" - "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl/contexts/ottlmetric" ) From 5bb3c30db70c9bafe0361d18272a285af17b2bbd Mon Sep 17 00:00:00 2001 From: Shaun Remekie Date: Thu, 19 Sep 2024 00:05:36 +0100 Subject: [PATCH 44/46] go fmt --- .../func_convert_exponential_hist_to_explicit_hist_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist_test.go b/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist_test.go index 8bcc90bfd361..3e11f2ff7e40 100644 --- a/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist_test.go +++ b/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist_test.go @@ -8,10 +8,10 @@ import ( "testing" "time" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl/contexts/ottlmetric" "github.com/stretchr/testify/assert" "go.opentelemetry.io/collector/pdata/pcommon" "go.opentelemetry.io/collector/pdata/pmetric" - "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl/contexts/ottlmetric" ) var nonExponentialHist = func() pmetric.Metric { From 614dcad8475d529fad9f439d1983edfe612a0ad5 Mon Sep 17 00:00:00 2001 From: Shaun Remekie Date: Fri, 20 Sep 2024 15:40:57 +0100 Subject: [PATCH 45/46] gci-ed --- .../func_convert_exponential_hist_to_explicit_hist_test.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist_test.go b/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist_test.go index 3e11f2ff7e40..1dd76ad4f097 100644 --- a/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist_test.go +++ b/processor/transformprocessor/internal/metrics/func_convert_exponential_hist_to_explicit_hist_test.go @@ -8,10 +8,11 @@ import ( "testing" "time" - "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl/contexts/ottlmetric" "github.com/stretchr/testify/assert" "go.opentelemetry.io/collector/pdata/pcommon" "go.opentelemetry.io/collector/pdata/pmetric" + + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl/contexts/ottlmetric" ) var nonExponentialHist = func() pmetric.Metric { From 5870ff45f4d5a36614c49a51a0ec630c01b76565 Mon Sep 17 00:00:00 2001 From: Shaun Remekie Date: Fri, 20 Sep 2024 23:32:31 +0100 Subject: [PATCH 46/46] make crosslink --- processor/transformprocessor/go.mod | 1 - 1 file changed, 1 deletion(-) diff --git a/processor/transformprocessor/go.mod b/processor/transformprocessor/go.mod index 9c3fea828e11..19629b783abf 100644 --- a/processor/transformprocessor/go.mod +++ b/processor/transformprocessor/go.mod @@ -25,7 +25,6 @@ require ( go.uber.org/zap v1.27.0 ) - require ( go.opentelemetry.io/collector/consumer/consumertest v0.109.1-0.20240918193345-a3c0565031b0 golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842