diff --git a/.chloggen/sampling-improvements.yaml b/.chloggen/sampling-improvements.yaml new file mode 100644 index 000000000000..e28b5eaa88f1 --- /dev/null +++ b/.chloggen/sampling-improvements.yaml @@ -0,0 +1,27 @@ +# Use this changelog template to create an entry for release notes. + +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: enhancement + +# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver) +component: pkg/sampling + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: Usability improvements in the sampling API. + +# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. +issues: [31918] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: + +# If your change doesn't affect end users or the exported elements of any package, +# you should instead start your pull request title with [chore] or use the "Skip Changelog" label. +# Optional: The change log or logs in which this entry should be included. +# e.g. '[user]' or '[user, api]' +# Include 'user' if the change is relevant to end users. +# Include 'api' if there is a change to a library API. +# Default: '[user]' +change_logs: [api] diff --git a/pkg/sampling/doc.go b/pkg/sampling/doc.go index 4b23f1fb85ba..0c5b1052bbc5 100644 --- a/pkg/sampling/doc.go +++ b/pkg/sampling/doc.go @@ -37,53 +37,58 @@ // conditioned on how much sampling has already taken place, use the // following pseudocode. // -// func Setup() { -// // Get a fixed probability value from the configuration, in -// // the range (0, 1]. -// probability := *FLAG_probability +// func Setup() { +// // Get a fixed probability value from the configuration, in +// // the range (0, 1]. +// probability := *FLAG_probability // -// // Calculate the sampling threshold from probability using 3 -// // hex digits of precision. -// fixedThreshold, err = ProbabilityToThresholdWithPrecision(probability, 3) -// if err != nil { -// // error case: Probability is not valid. -// } -// } +// // Calculate the sampling threshold from probability using 3 +// // hex digits of precision. +// fixedThreshold, err = ProbabilityToThresholdWithPrecision(probability, 3) +// if err != nil { +// // error case: Probability is not valid. +// } +// } // -// func MakeDecision(tracestate string, tid TraceID) bool { -// // Parse the incoming tracestate -// ts, err := NewW3CTraceState(tracestate) -// if err != nil { -// // error case: Tracestate is ill-formed. -// } -// // For an absolute probability sample, we check the incoming -// // tracestate to see whether it was already sampled enough. -// if len(ts.OTelValue().TValue()) != 0 { -// // If the incoming tracestate was already sampled at -// // least as much as our threshold implies, then its -// // (rejection) threshold is higher. If so, then no -// // further sampling is called for. -// if ThresholdGreater(ts.OTelValue().TValueThreshold(), fixedThreshold) { -// return true -// } -// } -// var rnd Randomness -// // If the R-value is present, use it. If not, rely on TraceID -// // randomness. Note that OTLP v1.1.0 introduces a new Span flag -// // to convey trace randomness correctly, and if the context has -// // neither the randomness bit set or the R-value set, we need a -// // fallback, which can be to synthesize an R-value or to assume -// // the TraceID has sufficient randomness. This detail is left -// // out of scope. -// if rval, hasRval := ts.OTelValue().RValueRandomness(); hasRv { -// rnd = rval -// } else { -// rnd = TraceIDToRandomness(tid) -// } -// -// return fixedThreshold.ShouldSample(rnd) -// } +// func MakeDecision(tracestate string, tid TraceID) bool { +// // Parse the incoming tracestate +// ts, err := NewW3CTraceState(tracestate) +// if err != nil { +// // error case: Tracestate is ill-formed. +// } +// // For an absolute probability sample, we check the incoming +// // tracestate to see whether it was already sampled enough. +// if threshold, hasThreshold := ts.OTelValue().TValueThreshold(); hasThreshold { +// // If the incoming tracestate was already sampled at +// // least as much as our threshold implies, then its +// // (rejection) threshold is higher. If so, then no +// // further sampling is called for. +// if ThresholdGreater(threshold, fixedThreshold) { +// // Do not update. +// return true +// } +// // The error below is ignored because we've tested +// // the equivalent condition above. This lowers the sampling +// // probability expressed in the tracestate T-value. +// _ = ts.OTelValue().UpdateThresholdWithSampling(fixedThreshold) +// } +// var rnd Randomness +// // If the R-value is present, use it. If not, rely on TraceID +// // randomness. Note that OTLP v1.1.0 introduces a new Span flag +// // to convey trace randomness correctly, and if the context has +// // neither the randomness bit set or the R-value set, we need a +// // fallback, which can be to synthesize an R-value or to assume +// // the TraceID has sufficient randomness. This detail is left +// // out of scope. +// if rv, hasRand := ts.OTelValue().RValueRandomness(); hasRand { +// rnd = rv +// } else { +// rnd = TraceIDToRandomness(tid) +// } +// return fixedThreshold.ShouldSample(rnd) +// } // // [W3C tracecontext specification]: https://www.w3.org/TR/trace-context/#tracestate-header // [Tracestate handling]: https://opentelemetry.io/docs/specs/otel/trace/tracestate-handling/ + package sampling // import "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling" diff --git a/pkg/sampling/oteltracestate.go b/pkg/sampling/oteltracestate.go index 22babccda627..fbd277347175 100644 --- a/pkg/sampling/oteltracestate.go +++ b/pkg/sampling/oteltracestate.go @@ -97,9 +97,8 @@ func NewOpenTelemetryTraceState(input string) (OpenTelemetryTraceState, error) { if otts.rnd, err = RValueToRandomness(value); err == nil { otts.rvalue = value } else { - // The zero-value for randomness implies always-sample; - // the threshold test is R < T, but T is not meaningful - // at zero, and this value implies zero adjusted count. + // RValueRandomness() will return false, the error + // accumulates and is returned below. otts.rvalue = "" otts.rnd = Randomness{} } @@ -107,6 +106,8 @@ func NewOpenTelemetryTraceState(input string) (OpenTelemetryTraceState, error) { if otts.threshold, err = TValueToThreshold(value); err == nil { otts.tvalue = value } else { + // TValueThreshold() will return false, the error + // accumulates and is returned below. otts.tvalue = "" otts.threshold = AlwaysSampleThreshold } @@ -148,29 +149,47 @@ func (otts *OpenTelemetryTraceState) TValueThreshold() (Threshold, bool) { } // UpdateTValueWithSampling modifies the TValue of this object, which -// changes its adjusted count. If the change of TValue leads to -// inconsistency (i.e., raising sampling probability), an error is -// returned. -func (otts *OpenTelemetryTraceState) UpdateTValueWithSampling(sampledThreshold Threshold, encodedTValue string) error { +// changes its adjusted count. It is not logical to modify a sampling +// probability in the direction of larger probability. This prevents +// accidental loss of adjusted count. +// +// If the change of TValue leads to inconsistency, an error is returned. +func (otts *OpenTelemetryTraceState) UpdateTValueWithSampling(sampledThreshold Threshold) error { + // Note: there was once a code path here that optimized for + // cases where a static threshold is used, in which case the + // call to TValue() causes an unnecessary allocation per data + // item (w/ a constant result). We have eliminated that + // parameter, due to the significant potential for mis-use. + // Therefore, this method always recomputes TValue() of the + // sampledThreshold (on success). A future method such as + // UpdateTValueWithSamplingFixedTValue() could extend this + // API to address this allocation, although it is probably + // not significant. if len(otts.TValue()) != 0 && ThresholdGreater(otts.threshold, sampledThreshold) { return ErrInconsistentSampling } + // Note NeverSampleThreshold is the (exclusive) upper boundary + // of valid thresholds, so the test above permits never- + // sampled updates, in which case the TValue() here is empty. otts.threshold = sampledThreshold - otts.tvalue = encodedTValue + otts.tvalue = sampledThreshold.TValue() return nil } -// AdjustedCount returns the adjusted count implied by this TValue. -// This term is defined here: -// https://opentelemetry.io/docs/specs/otel/trace/tracestate-probability-sampling/ +// AdjustedCount returns the adjusted count for this item. If the +// TValue string is empty, this returns 0, otherwise returns +// Threshold.AdjustedCount(). func (otts *OpenTelemetryTraceState) AdjustedCount() float64 { - if len(otts.TValue()) == 0 { + if len(otts.tvalue) == 0 { + // Note: this case covers the zero state, where + // len(tvalue) == 0 and threshold == AlwaysSampleThreshold. + // We return 0 to indicate that no information is available. return 0 } - return 1.0 / otts.threshold.Probability() + return otts.threshold.AdjustedCount() } -// ClearTValue is used to unset TValue, in cases where it is +// ClearTValue is used to unset TValue, for use in cases where it is // inconsistent on arrival. func (otts *OpenTelemetryTraceState) ClearTValue() { otts.tvalue = "" diff --git a/pkg/sampling/oteltracestate_test.go b/pkg/sampling/oteltracestate_test.go index b14c05690b53..4330c01466ab 100644 --- a/pkg/sampling/oteltracestate_test.go +++ b/pkg/sampling/oteltracestate_test.go @@ -105,7 +105,7 @@ func TestOpenTelemetryTraceStateTValueUpdate(t *testing.T) { require.NotEqual(t, "", otts.RValue()) th, _ := TValueToThreshold("3") - require.NoError(t, otts.UpdateTValueWithSampling(th, "3")) + require.NoError(t, otts.UpdateTValueWithSampling(th)) require.Equal(t, "3", otts.TValue()) tv, hasTv := otts.TValueThreshold() @@ -126,7 +126,7 @@ func TestOpenTelemetryTraceStateRTUpdate(t *testing.T) { require.True(t, otts.HasAnyValue()) th, _ := TValueToThreshold("3") - require.NoError(t, otts.UpdateTValueWithSampling(th, "3")) + require.NoError(t, otts.UpdateTValueWithSampling(th)) otts.SetRValue(must(RValueToRandomness("00000000000003"))) const updated = "rv:00000000000003;th:3;a:b" @@ -329,7 +329,7 @@ func TestUpdateTValueWithSampling(t *testing.T) { newTh, err := ProbabilityToThreshold(test.prob) require.NoError(t, err) - upErr := otts.UpdateTValueWithSampling(newTh, newTh.TValue()) + upErr := otts.UpdateTValueWithSampling(newTh) require.Equal(t, test.updateErr, upErr) diff --git a/pkg/sampling/probability.go b/pkg/sampling/probability.go index 4b3498b8596b..1aeebdd86021 100644 --- a/pkg/sampling/probability.go +++ b/pkg/sampling/probability.go @@ -11,12 +11,9 @@ import ( // ErrProbabilityRange is returned when a value should be in the range [1/MaxAdjustedCount, 1]. var ErrProbabilityRange = errors.New("sampling probability out of the range [1/MaxAdjustedCount, 1]") -// ErrPrecisionUnderflow is returned when a precision is too great for the range. -var ErrPrecisionUnderflow = errors.New("sampling precision is too great for the range") - // MinSamplingProbability is the smallest representable probability // and is the inverse of MaxAdjustedCount. -const MinSamplingProbability = 1.0 / MaxAdjustedCount +const MinSamplingProbability = 1.0 / float64(MaxAdjustedCount) // probabilityInRange tests MinSamplingProb <= prob <= 1. func probabilityInRange(prob float64) bool { @@ -26,67 +23,60 @@ func probabilityInRange(prob float64) bool { // ProbabilityToThreshold converts a probability to a Threshold. It // returns an error when the probability is out-of-range. func ProbabilityToThreshold(prob float64) (Threshold, error) { - // Probability cases - if !probabilityInRange(prob) { - return AlwaysSampleThreshold, ErrProbabilityRange - } - - scaled := uint64(math.Round(prob * MaxAdjustedCount)) - - return Threshold{ - unsigned: MaxAdjustedCount - scaled, - }, nil + return ProbabilityToThresholdWithPrecision(prob, NumHexDigits) } // ProbabilityToThresholdWithPrecision is like ProbabilityToThreshold -// with support for reduced precision. The `prec` argument determines +// with support for reduced precision. The `precision` argument determines // how many significant hex digits will be used to encode the exact // probability. -func ProbabilityToThresholdWithPrecision(prob float64, prec uint8) (Threshold, error) { +func ProbabilityToThresholdWithPrecision(fraction float64, precision int) (Threshold, error) { // Assume full precision at 0. - if prec == 0 { - return ProbabilityToThreshold(prob) + if precision == 0 { + precision = NumHexDigits } - if !probabilityInRange(prob) { + if !probabilityInRange(fraction) { return AlwaysSampleThreshold, ErrProbabilityRange } - // Special case for prob == 1. The logic for revising precision - // that follows requires 0 < 1 - prob < 1. - if prob == 1 { + // Special case for prob == 1. + if fraction == 1 { return AlwaysSampleThreshold, nil } - // Adjust precision considering the significance of leading - // zeros. If we can multiply the rejection probability by 16 - // and still be less than 1, then there is a leading zero of - // obligatory precision. - for reject := 1 - prob; reject*16 < 1; { - reject *= 16 - prec++ - } - - // Check if leading zeros plus precision is above the maximum. - // This is called underflow because the requested precision - // leads to complete no significant figures. - if prec > NumHexDigits { - return AlwaysSampleThreshold, ErrPrecisionUnderflow + // Calculate the amount of precision needed to encode the + // threshold with reasonable precision. Here, we count the + // number of leading `0` or `f` characters and automatically + // add precision to preserve relative error near the extremes. + // + // Frexp() normalizes both the fraction and one-minus the + // fraction, because more digits of precision are needed if + // either value is near zero. Frexp returns an exponent <= 0. + // + // If `exp <= -4`, there will be a leading hex `0` or `f`. + // For every multiple of -4, another leading `0` or `f` + // appears, so this raises precision accordingly. + _, expF := math.Frexp(fraction) + _, expR := math.Frexp(1 - fraction) + precision = min(NumHexDigits, max(precision+expF/-hexBits, precision+expR/-hexBits)) + + // Compute the threshold + scaled := uint64(math.Round(fraction * float64(MaxAdjustedCount))) + threshold := MaxAdjustedCount - scaled + + // Round to the specified precision, if less than the maximum. + if shift := hexBits * (NumHexDigits - precision); shift != 0 { + half := uint64(1) << (shift - 1) + threshold += half + threshold >>= shift + threshold <<= shift } - scaled := uint64(math.Round(prob * MaxAdjustedCount)) - rscaled := MaxAdjustedCount - scaled - shift := 4 * (14 - prec) - half := uint64(1) << (shift - 1) - - rscaled += half - rscaled >>= shift - rscaled <<= shift - return Threshold{ - unsigned: rscaled, + unsigned: threshold, }, nil } // Probability is the sampling ratio in the range [MinSamplingProb, 1]. func (t Threshold) Probability() float64 { - return float64(MaxAdjustedCount-t.unsigned) / MaxAdjustedCount + return float64(MaxAdjustedCount-t.unsigned) / float64(MaxAdjustedCount) } diff --git a/pkg/sampling/probability_test.go b/pkg/sampling/probability_test.go index 33b38d9deec7..1d843753af69 100644 --- a/pkg/sampling/probability_test.go +++ b/pkg/sampling/probability_test.go @@ -171,7 +171,10 @@ func ExampleProbabilityToThreshold_verysmall() { 0x8p-56, // Skip 8 out of 2**56 0x10p-56, // Skip 0x10 out of 2**56 } { - tval, _ := ProbabilityToThreshold(prob) + // Note that precision is automatically raised for + // such small probabilities, because leading 'f' and + // '0' digits are discounted. + tval, _ := ProbabilityToThresholdWithPrecision(prob, 3) fmt.Println(tval.TValue()) } @@ -279,7 +282,7 @@ func TestProbabilityToThresholdWithPrecision(t *testing.T) { for len(strip) > 0 && strip[0] == '0' { strip = strip[1:] } - rth, err := ProbabilityToThresholdWithPrecision(test.prob, uint8(len(strip))) + rth, err := ProbabilityToThresholdWithPrecision(test.prob, len(strip)) require.NoError(t, err) require.Equal(t, round, rth.TValue()) }) diff --git a/pkg/sampling/randomness.go b/pkg/sampling/randomness.go index 1cda0da8cc87..8e1cac2f0fcb 100644 --- a/pkg/sampling/randomness.go +++ b/pkg/sampling/randomness.go @@ -30,6 +30,9 @@ var ErrRValueSize = errors.New("r-value must have 14 hex digits") // the TraceID, as specified in https://www.w3.org/TR/trace-context-2/#randomness-of-trace-id const leastHalfTraceIDThresholdMask = MaxAdjustedCount - 1 +// AllProbabilitiesRandomness is sampled at all probabilities. +var AllProbabilitiesRandomness = Randomness{unsigned: numRandomnessValues - 1} + // Randomness may be derived from R-value or TraceID. // // Randomness contains 56 bits of randomness, derived in one of two ways, see: @@ -85,5 +88,29 @@ func (rnd Randomness) RValue() string { // numRandomnessValues is 2^56: 100000000000000 // randomness+numRandomnessValues: 100aabbccddeeff // strip the leading "1": 00aabbccddeeff + // + // If the value is out-of-range, the empty string will be + // returned. + if rnd.unsigned >= numRandomnessValues { + return "" + } return strconv.FormatUint(numRandomnessValues+rnd.unsigned, hexBase)[1:] } + +// Unsigned returns the unsigned representation of the random value. +// Items of data SHOULD be sampled when: +// +// Threshold.Unsigned() <= // Randomness.Unsigned(). +func (rnd Randomness) Unsigned() uint64 { + return rnd.unsigned +} + +// UnsignedToRandomness constructs a randomness using 56 random bits +// of unsigned number. If the input is out of range, an invalid value +// will be returned with an error. +func UnsignedToRandomness(x uint64) (Randomness, error) { + if x >= MaxAdjustedCount { + return AllProbabilitiesRandomness, ErrRValueSize + } + return Randomness{unsigned: x}, nil +} diff --git a/pkg/sampling/randomness_test.go b/pkg/sampling/randomness_test.go index 542629ca12ba..9d4a164b6509 100644 --- a/pkg/sampling/randomness_test.go +++ b/pkg/sampling/randomness_test.go @@ -5,10 +5,23 @@ package sampling // import "github.com/open-telemetry/opentelemetry-collector-co import ( "fmt" + "testing" + "github.com/stretchr/testify/require" "go.opentelemetry.io/collector/pdata/pcommon" ) +func TestExplicitRandomness(t *testing.T) { + const val uint64 = 0x80000000000000 + rv, err := UnsignedToRandomness(val) + require.NoError(t, err) + require.Equal(t, val, rv.Unsigned()) + + rv, err = UnsignedToRandomness(MaxAdjustedCount) + require.Equal(t, err, ErrRValueSize) + require.Equal(t, AllProbabilitiesRandomness.Unsigned(), rv.Unsigned()) +} + func ExampleTraceIDToRandomness() { // TraceID represented in hex as "abababababababababd29d6a7215ced0" var exampleTid = pcommon.TraceID{ diff --git a/pkg/sampling/threshold.go b/pkg/sampling/threshold.go index af39be34cddf..044fef736d5b 100644 --- a/pkg/sampling/threshold.go +++ b/pkg/sampling/threshold.go @@ -11,7 +11,7 @@ import ( const ( // MaxAdjustedCount is 2^56 i.e. 0x100000000000000 i.e., 1<<56. - MaxAdjustedCount = 1 << 56 + MaxAdjustedCount uint64 = 1 << 56 // NumHexDigits is the number of hex digits equalling 56 bits. // This is the limit of sampling precision. @@ -51,6 +51,10 @@ var ( // AlwaysSampleThreshold represents 100% sampling. AlwaysSampleThreshold = Threshold{unsigned: 0} + + // NeverSampledThreshold is a threshold value that will always not sample. + // The TValue() corresponding with this threshold is an empty string. + NeverSampleThreshold = Threshold{unsigned: MaxAdjustedCount} ) // TValueToThreshold returns a Threshold. Because TValue strings @@ -79,14 +83,27 @@ func TValueToThreshold(s string) (Threshold, error) { }, nil } +// UnsignedToThreshold constructs a threshold expressed in terms +// defined by number of rejections out of MaxAdjustedCount, which +// equals the number of randomness values. +func UnsignedToThreshold(unsigned uint64) (Threshold, error) { + if unsigned >= MaxAdjustedCount { + return NeverSampleThreshold, ErrTValueSize + } + return Threshold{unsigned: unsigned}, nil +} + // TValue encodes a threshold, which is a variable-length hex string // up to 14 characters. The empty string is returned for 100% // sampling. func (th Threshold) TValue() string { // Always-sample is a special case because TrimRight() below // will trim it to the empty string, which represents no t-value. - if th == AlwaysSampleThreshold { + switch th { + case AlwaysSampleThreshold: return "0" + case NeverSampleThreshold: + return "" } // For thresholds other than the extremes, format a full-width // (14 digit) unsigned value with leading zeros, then, remove @@ -98,9 +115,32 @@ func (th Threshold) TValue() string { } // ShouldSample returns true when the span passes this sampler's -// consistent sampling decision. +// consistent sampling decision. The sampling decision can be +// expressed as a T <= R. func (th Threshold) ShouldSample(rnd Randomness) bool { - return rnd.unsigned >= th.unsigned + return th.unsigned <= rnd.unsigned +} + +// Unsigned expresses the number of Randomness values (out of +// MaxAdjustedCount) that are rejected or not sampled. 0 means 100% +// sampling. +func (th Threshold) Unsigned() uint64 { + return th.unsigned +} + +// AdjustedCount returns the adjusted count for this item, which is +// the representativity of the item due to sampling, equal to the +// inverse of sampling probability. If the threshold equals +// NeverSampleThreshold, the item should not have been sampled, in +// which case the Adjusted count is zero. +// +// This term is defined here: +// https://opentelemetry.io/docs/specs/otel/trace/tracestate-probability-sampling/ +func (th Threshold) AdjustedCount() float64 { + if th == NeverSampleThreshold { + return 0 + } + return 1.0 / th.Probability() } // ThresholdGreater allows direct comparison of Threshold values. diff --git a/pkg/sampling/threshold_test.go b/pkg/sampling/threshold_test.go index bada76904874..f05fb1033f23 100644 --- a/pkg/sampling/threshold_test.go +++ b/pkg/sampling/threshold_test.go @@ -6,10 +6,29 @@ package sampling // import "github.com/open-telemetry/opentelemetry-collector-co import ( "encoding/hex" "fmt" + "testing" + "github.com/stretchr/testify/require" "go.opentelemetry.io/collector/pdata/pcommon" ) +func TestExplicitThreshold(t *testing.T) { + const val uint64 = 0x80000000000000 + th, err := UnsignedToThreshold(val) + require.NoError(t, err) + require.Equal(t, val, th.Unsigned()) + + th, err = UnsignedToThreshold(MaxAdjustedCount) + require.Equal(t, err, ErrTValueSize) + require.Equal(t, MaxAdjustedCount, th.Unsigned()) + + // Note: the input is more out-of-range than above, test th == + // NeverSampleThreshold. + th, err = UnsignedToThreshold(MaxAdjustedCount + 10) + require.Equal(t, err, ErrTValueSize) + require.Equal(t, NeverSampleThreshold, th) +} + // ExampleTValueToThreshold demonstrates how to convert a T-value // string to a Threshold value. func ExampleTValueToThreshold() { diff --git a/pkg/sampling/w3ctracestate_test.go b/pkg/sampling/w3ctracestate_test.go index ec12774b9771..02eccf35c01b 100644 --- a/pkg/sampling/w3ctracestate_test.go +++ b/pkg/sampling/w3ctracestate_test.go @@ -55,7 +55,7 @@ func ExampleW3CTraceState_Serialize() { // value, since in some code paths the Threshold will have // just been parsed from a T-value, and in other code paths // the T-value will be precalculated. - err = w3c.OTelValue().UpdateTValueWithSampling(th, th.TValue()) + err = w3c.OTelValue().UpdateTValueWithSampling(th) if err != nil { panic(err) }