Skip to content

Commit

Permalink
[OTEL-2348] Improve DDSketch to Sketch conversion (#468)
Browse files Browse the repository at this point in the history
* Improve stability of DDSketch to Sketch conversion

* Add changelog entry
  • Loading branch information
jade-guiton-dd authored Jan 23, 2025
1 parent 7a80ef8 commit 30dbd89
Show file tree
Hide file tree
Showing 13 changed files with 95 additions and 95 deletions.
16 changes: 16 additions & 0 deletions .chloggen/improve-sketch-conversion.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
change_type: bug_fix

# The name of the component (e.g. pkg/quantile)
component: pkg/quantile

# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
note: Improve stability of OTel histogram conversion

# The PR related to this change
issues: [468]

# (Optional) One or more lines of additional information to render under the primary note.
# These lines will be padded with 2 spaces and then inserted directly into the document.
# Use pipe (|) for multiline entries.
subtext:
2 changes: 1 addition & 1 deletion pkg/otlp/metrics/sketches_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -506,7 +506,7 @@ func TestKnownDistributionsQuantile(t *testing.T) {
startTime := timeNow.Add(-10 * time.Second)
name := "example.histo"
const (
N uint64 = 1_000
N uint64 = 2_000
M uint64 = 100
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,15 @@
1344
],
"Counts": [
5,
6,
4,
1,
10,
0,
2,
10,
1,
1,
2,
3,
4
3
]
}
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,15 +33,15 @@
1344
],
"Counts": [
5,
6,
4,
1,
10,
0,
2,
10,
1,
1,
2,
3,
4
3
]
}
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,15 +33,15 @@
1344
],
"Counts": [
5,
6,
4,
1,
10,
0,
2,
10,
1,
1,
2,
3,
4
3
]
}
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,15 +33,15 @@
1344
],
"Counts": [
5,
6,
4,
1,
10,
0,
2,
10,
1,
1,
2,
3,
4
3
]
}
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,15 @@
1344
],
"Counts": [
5,
6,
4,
1,
10,
0,
2,
10,
1,
1,
2,
3,
4
3
]
}
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,15 +33,15 @@
1344
],
"Counts": [
5,
6,
4,
1,
10,
0,
2,
10,
1,
1,
2,
3,
4
3
]
}
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,15 +33,15 @@
1344
],
"Counts": [
5,
6,
4,
1,
10,
0,
2,
10,
1,
1,
2,
3,
4
3
]
}
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,15 +33,15 @@
1344
],
"Counts": [
5,
6,
4,
1,
10,
0,
2,
10,
1,
1,
2,
3,
4
3
]
}
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,15 @@
1344
],
"Counts": [
5,
6,
4,
1,
10,
0,
2,
10,
1,
1,
2,
3,
4
3
]
}
],
Expand Down
40 changes: 10 additions & 30 deletions pkg/quantile/ddsketch.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,39 +51,24 @@ type floatKeyCount struct {

// convertFloatCountsToIntCounts converts a list of float counts to integer counts,
// preserving the total count of the list by tracking leftover decimal counts.
// TODO: this tends to shift sketches towards the right (since the leftover counts
// get added to the rightmost bucket). This could be improved by adding leftover
// counts to the key that's the weighted average of keys that contribute to that leftover count.
func convertFloatCountsToIntCounts(floatKeyCounts []floatKeyCount) []KeyCount {
func convertFloatCountsToIntCounts(floatKeyCounts []floatKeyCount) ([]KeyCount, uint) {
keyCounts := make([]KeyCount, 0, len(floatKeyCounts))

sort.Slice(floatKeyCounts, func(i, j int) bool {
return floatKeyCounts[i].k < floatKeyCounts[j].k
})

leftoverCount := 0.0
floatTotal := 0.0
intTotal := uint(0)
for _, fkc := range floatKeyCounts {
key := fkc.k
count := fkc.c

// Add leftovers from previous bucket, and compute leftovers
// for the next bucket
count += leftoverCount
uintCount := uint(count)
leftoverCount = count - float64(uintCount)

keyCounts = append(keyCounts, KeyCount{k: Key(key), n: uintCount})
floatTotal += fkc.c
rounded := uint(math.Round(floatTotal)) - intTotal
intTotal += rounded
// At this point, intTotal == Round(floatTotal)
keyCounts = append(keyCounts, KeyCount{k: Key(fkc.k), n: rounded})
}

// Edge case where there may be some leftover count because the total count
// isn't an int (or due to float64 precision errors). In this case, round to
// nearest.
if leftoverCount >= 0.5 {
lastIndex := len(keyCounts) - 1
keyCounts[lastIndex] = KeyCount{k: keyCounts[lastIndex].k, n: keyCounts[lastIndex].n + 1}
}

return keyCounts
return keyCounts, intTotal
}

// convertDDSketchIntoSketch takes a DDSketch and moves its data to a Sketch.
Expand Down Expand Up @@ -155,19 +140,14 @@ func convertDDSketchIntoSketch(c *Config, inputSketch *ddsketch.DDSketch) (*Sket
floatKeyCounts = append(floatKeyCounts, floatKeyCount{k: 0, c: zeroes})

// Generate the integer KeyCount objects from the counts we retrieved
keyCounts := convertFloatCountsToIntCounts(floatKeyCounts)
keyCounts, cnt := convertFloatCountsToIntCounts(floatKeyCounts)

// Populate sparseStore object with the collected keyCounts
// insertCounts will take care of creating multiple uint16 bins for a
// single key if the count overflows uint16
sparseStore.insertCounts(c, keyCounts)

// Create summary object
// Calculate the total count that was inserted in the Sketch
var cnt uint
for _, v := range keyCounts {
cnt += v.n
}
sum := inputSketch.GetSum()
avg := sum / float64(cnt)
max, err := inputSketch.GetMaxValue()
Expand Down
42 changes: 23 additions & 19 deletions pkg/quantile/ddsketch_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ func TestCreateDDSketchWithSketchMapping(t *testing.T) {
quantile: sketchtest.UQuadraticQ(-N, 0),
},
{
name: "U-quadratic distribution (a=-N,b=N)",
name: "U-quadratic distribution (a=-N/2,b=N/2)",
quantile: sketchtest.UQuadraticQ(-N/2, N/2),
},
{
Expand Down Expand Up @@ -196,13 +196,9 @@ func TestConvertDDSketchIntoSketch(t *testing.T) {
name: "Uniform distribution (a=0,b=N)",
quantile: sketchtest.UniformQ(0, N),
},
// The p99 for this test fails, likely due to the shift of leftover bucket counts the right that is performed
// during the DDSketch -> Sketch conversion, causing the p99 of the output sketch to fall on 0
// (which means the InEpsilon check returns 1).
{
name: "Uniform distribution (a=-N,b=0)",
quantile: sketchtest.UniformQ(-N, 0),
excludedQuantiles: map[int]bool{99: true},
name: "Uniform distribution (a=-N,b=0)",
quantile: sketchtest.UniformQ(-N, 0),
},
{
name: "Uniform distribution (a=-N,b=N)",
Expand All @@ -213,18 +209,16 @@ func TestConvertDDSketchIntoSketch(t *testing.T) {
quantile: sketchtest.UQuadraticQ(0, N),
},
{
name: "U-quadratic distribution (a=-N,b=N)",
name: "U-quadratic distribution (a=-N/2,b=N/2)",
quantile: sketchtest.UQuadraticQ(-N/2, N/2),
},
{
name: "U-quadratic distribution (a=-N,b=N)",
name: "U-quadratic distribution (a=-N,b=0)",
quantile: sketchtest.UQuadraticQ(-N, 0),
},
// Same as above, p99 fails.
{
name: "Truncated Exponential distribution (a=0,b=N,lambda=1/100)",
quantile: sketchtest.TruncateQ(0, N, sketchtest.ExponentialQ(1.0/100), sketchtest.ExponentialCDF(1.0/100)),
excludedQuantiles: map[int]bool{99: true},
name: "Truncated Exponential distribution (a=0,b=N,lambda=1/100)",
quantile: sketchtest.TruncateQ(0, N, sketchtest.ExponentialQ(1.0/100), sketchtest.ExponentialCDF(1.0/100)),
},
{
name: "Truncated Normal distribution (a=0,b=8,mu=0, sigma=1e-3)",
Expand Down Expand Up @@ -280,12 +274,22 @@ func TestConvertDDSketchIntoSketch(t *testing.T) {
outputSketch, err := convertDDSketchIntoSketch(sketchConfig, convertedSketch)
require.NoError(t, err)

// Conversion accuracy formula taken from:
// https://github.com/DataDog/logs-backend/blob/895e56c9eefa1c28a3affbdd0027f58a4c6f4322/domains/event-store/libs/event-store-aggregate/src/test/java/com/dd/event/store/api/query/sketch/SketchTest.java#L409-L422
inputGamma := (1.0 + convertedSketch.RelativeAccuracy()) / (1.0 - convertedSketch.RelativeAccuracy())
outputGamma := sketchConfig.gamma.v
conversionGamma := inputGamma * outputGamma * outputGamma
conversionRelativeAccuracy := (conversionGamma - 1) / (conversionGamma + 1)
/* We compute the expected bound on the relative error between percentile values before
* and after the conversion.
*
* - The +0.5 in `createDDSketchWithSketchMapping` compensates for a bug in
* `sketchConfig.key` which is not relevant here, creating a systematic bias of half
* a bin.
* - The error on accumulated bin counts caused by the rounding process is bounded by 0.5
* by design. In most circumstances, including the scenarios under test, this can
* shift quantiles by one bin at most.
* - Finally, a bug in the interpolation in `Sketch.Quantile` means the output can be
* off by another half a bin.
* (Unfortunately, other tests, as well as backend code, rely on these bugs, so they are
* not easily fixed.)
* In total, the expected worst case relative error:
*/
conversionRelativeAccuracy := sketchConfig.gamma.v*sketchConfig.gamma.v - 1

// Check the count of the output sketch
assert.InDelta(
Expand Down

0 comments on commit 30dbd89

Please sign in to comment.