Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: regression in sampling delegation handling #133

Merged
merged 1 commit into from
Jun 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 16 additions & 24 deletions src/datadog/extraction_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,6 @@ namespace datadog {
namespace tracing {
namespace {

constexpr StringView sampling_delegation_request_header =
"x-datadog-delegate-trace-sampling";

// Decode the specified `trace_tags` and integrate them into the specified
// `result`. If an error occurs, add a `tags::internal::propagation_error` tag
// to the specified `span_tags` and log a diagnostic using the specified
Expand Down Expand Up @@ -129,29 +126,24 @@ Expected<ExtractedData> extract_datadog(
}
result.parent_id = *parent_id;

if (auto found = headers.lookup("x-datadog-sampling-priority")) {
auto sampling_priority = parse_int(*found, 10);
if (auto* error = sampling_priority.if_error()) {
std::string prefix;
prefix +=
"Could not extract Datadog-style sampling priority from "
"x-datadog-sampling-priority: ";
append(prefix, *found);
prefix += ' ';
return error->with_prefix(prefix);
}

result.sampling_priority = *sampling_priority;
}

if (auto sampling_delegation_header =
headers.lookup(sampling_delegation_request_header)) {
headers.lookup("x-datadog-delegate-trace-sampling")) {
result.delegate_sampling_decision = true;
// If the trace sampling decision is being delegated to us, then we don't
// interpret the sampling priority (if any) included in the request.
} else {
result.delegate_sampling_decision = false;

const StringView sampling_priority_header = "x-datadog-sampling-priority";
if (auto found = headers.lookup(sampling_priority_header)) {
auto sampling_priority = parse_int(*found, 10);
if (auto* error = sampling_priority.if_error()) {
std::string prefix;
prefix += "Could not extract Datadog-style sampling priority from ";
append(prefix, sampling_priority_header);
prefix += ": ";
append(prefix, *found);
prefix += ' ';
return error->with_prefix(prefix);
}

result.sampling_priority = *sampling_priority;
}
}

auto origin = headers.lookup("x-datadog-origin");
Expand Down
81 changes: 45 additions & 36 deletions src/datadog/tracer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -176,10 +176,7 @@ Expected<Span> Tracer::extract_span(const DictReader& reader,
extracted_contexts.back().headers_examined = audited_reader.entries_found;
}

auto [trace_id, parent_id, origin, trace_tags, delegate_sampling_decision,
sampling_priority, datadog_w3c_parent_id, additional_w3c_tracestate,
additional_datadog_w3c_tracestate, style, headers_examined] =
merge(extracted_contexts);
auto merged_context = merge(extracted_contexts);

// Some information might be missing.
// Here are the combinations considered:
Expand All @@ -196,56 +193,60 @@ Expected<Span> Tracer::extract_span(const DictReader& reader,
// - trace ID and parent ID means we're extracting a child span
// - if trace ID is zero, then that's an error.

if (!trace_id && !parent_id) {
if (!merged_context.trace_id && !merged_context.parent_id) {
return Error{Error::NO_SPAN_TO_EXTRACT,
"There's neither a trace ID nor a parent span ID to extract."}
.with_prefix(extraction_error_prefix(style, headers_examined));
.with_prefix(extraction_error_prefix(merged_context.style,
merged_context.headers_examined));
}
if (!trace_id) {
if (!merged_context.trace_id) {
std::string message;
message +=
"There's no trace ID to extract, but there is a parent span ID: ";
message += std::to_string(*parent_id);
message += std::to_string(*merged_context.parent_id);
return Error{Error::MISSING_TRACE_ID, std::move(message)}.with_prefix(
extraction_error_prefix(style, headers_examined));
extraction_error_prefix(merged_context.style,
merged_context.headers_examined));
}
if (!parent_id && !origin) {
if (!merged_context.parent_id && !merged_context.origin) {
std::string message;
message +=
"There's no parent span ID to extract, but there is a trace ID: ";
message += "[hexadecimal = ";
message += trace_id->hex_padded();
if (trace_id->high == 0) {
message += merged_context.trace_id->hex_padded();
if (merged_context.trace_id->high == 0) {
message += ", decimal = ";
message += std::to_string(trace_id->low);
message += std::to_string(merged_context.trace_id->low);
}
message += ']';
return Error{Error::MISSING_PARENT_SPAN_ID, std::move(message)}.with_prefix(
extraction_error_prefix(style, headers_examined));
extraction_error_prefix(merged_context.style,
merged_context.headers_examined));
}

if (!parent_id) {
if (!merged_context.parent_id) {
// We have a trace ID, but not parent ID. We're meant to be the root, and
// whoever called us already created a trace ID for us (to correlate with
// whatever they're doing).
parent_id = 0;
merged_context.parent_id = 0;
}

assert(parent_id);
assert(trace_id);
assert(merged_context.parent_id);
assert(merged_context.trace_id);

if (*trace_id == 0) {
if (*merged_context.trace_id == 0) {
return Error{Error::ZERO_TRACE_ID,
"extracted zero value for trace ID, which is invalid"}
.with_prefix(extraction_error_prefix(style, headers_examined));
.with_prefix(extraction_error_prefix(merged_context.style,
merged_context.headers_examined));
}

// We're done extracting fields. Now create the span.
// This is similar to what we do in `create_span`.
span_data->apply_config(*config_manager_->span_defaults(), config, clock_);
span_data->span_id = generator_->span_id();
span_data->trace_id = *trace_id;
span_data->parent_id = *parent_id;
span_data->trace_id = *merged_context.trace_id;
span_data->parent_id = *merged_context.parent_id;

if (span_data->trace_id.high) {
// The trace ID has some bits set in the higher 64 bits. Set the
Expand All @@ -255,12 +256,14 @@ Expected<Span> Tracer::extract_span(const DictReader& reader,
// First, though, if the `trace_id_high` tag is already set and has a
// bogus value or a value inconsistent with the trace ID, tag an error.
const auto hex_high = hex_padded(span_data->trace_id.high);
const auto extant = std::find_if(
trace_tags.begin(), trace_tags.end(), [&](const auto& pair) {
return pair.first == tags::internal::trace_id_high;
});
if (extant == trace_tags.end()) {
trace_tags.emplace_back(tags::internal::trace_id_high, hex_high);
const auto extant =
std::find_if(merged_context.trace_tags.begin(),
merged_context.trace_tags.end(), [&](const auto& pair) {
return pair.first == tags::internal::trace_id_high;
});
if (extant == merged_context.trace_tags.end()) {
merged_context.trace_tags.emplace_back(tags::internal::trace_id_high,
hex_high);
} else {
// There is already a `trace_id_high` tag. `hex_high` is its proper
// value. Check if the extant value is malformed or different from
Expand All @@ -279,14 +282,18 @@ Expected<Span> Tracer::extract_span(const DictReader& reader,
}
}

if (datadog_w3c_parent_id) {
span_data->tags[tags::internal::w3c_parent_id] = *datadog_w3c_parent_id;
if (merged_context.datadog_w3c_parent_id) {
span_data->tags[tags::internal::w3c_parent_id] =
*merged_context.datadog_w3c_parent_id;
}

const bool delegate_sampling_decision =
sampling_delegation_enabled_ && merged_context.delegate_sampling_decision;

Optional<SamplingDecision> sampling_decision;
if (sampling_priority) {
if (!delegate_sampling_decision && merged_context.sampling_priority) {
SamplingDecision decision;
decision.priority = *sampling_priority;
decision.priority = *merged_context.sampling_priority;
// `decision.mechanism` is null. We might be able to infer it once we
// extract `trace_tags`, but we would have no use for it, so we won't.
decision.origin = SamplingDecision::Origin::EXTRACTED;
Expand All @@ -300,10 +307,12 @@ Expected<Span> Tracer::extract_span(const DictReader& reader,
logger_, collector_, tracer_telemetry_, config_manager_->trace_sampler(),
span_sampler_, config_manager_->span_defaults(), config_manager_,
runtime_id_, sampling_delegation_enabled_, delegate_sampling_decision,
injection_styles_, hostname_, std::move(origin), tags_header_max_size_,
std::move(trace_tags), std::move(sampling_decision),
std::move(additional_w3c_tracestate),
std::move(additional_datadog_w3c_tracestate), std::move(span_data));
injection_styles_, hostname_, std::move(merged_context.origin),
tags_header_max_size_, std::move(merged_context.trace_tags),
std::move(sampling_decision),
std::move(merged_context.additional_w3c_tracestate),
std::move(merged_context.additional_datadog_w3c_tracestate),
std::move(span_data));
Span span{span_data_ptr, segment,
[generator = generator_]() { return generator->span_id(); },
clock_};
Expand Down
101 changes: 64 additions & 37 deletions test/test_tracer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -445,16 +445,6 @@ TEST_CASE("span extraction") {
TraceID(123),
456,
2},
{__LINE__,
"datadog style with delegate header",
{PropagationStyle::DATADOG},
{{"x-datadog-trace-id", "123"},
{"x-datadog-parent-id", "456"},
{"x-datadog-delegate-trace-sampling", "delegate"},
{"x-datadog-sampling-priority", "3"}},
TraceID(123),
456,
nullopt},
{__LINE__,
"datadog style without sampling priority",
{PropagationStyle::DATADOG},
Expand Down Expand Up @@ -1204,7 +1194,12 @@ TEST_CASE("span extraction") {
MockDictReader reader{headers};
auto span = tracer.extract_span(reader);
REQUIRE(span);
REQUIRE(!span->trace_segment().sampling_decision());

if (*config.delegate_trace_sampling) {
REQUIRE(!span->trace_segment().sampling_decision());
} else {
REQUIRE(span->trace_segment().sampling_decision());
}

MockDictWriter writer;
span->inject(writer);
Expand Down Expand Up @@ -1409,6 +1404,49 @@ TEST_CASE(
test_case.expected_error_prefix + test_case.tid_tag_value);
}

TEST_CASE("sampling delegation extraction") {
const bool enable_sampling_delegation = GENERATE(true, false);

CAPTURE(enable_sampling_delegation);

const auto logger = std::make_shared<NullLogger>();
const auto collector = std::make_shared<NullCollector>();

TracerConfig config;
config.service = "test-sampling-delegation";
config.logger = logger;
config.collector = collector;
config.extraction_styles = {PropagationStyle::DATADOG};
config.trace_sampler.sample_rate = 1.;
config.delegate_trace_sampling = enable_sampling_delegation;

auto validated_config = finalize_config(config);
REQUIRE(validated_config);

Tracer tracer(*validated_config);

const std::unordered_map<std::string, std::string> headers{
{"x-datadog-trace-id", "17491188783264004180"},
{"x-datadog-parent-id", "3390700340160032468"},
{"x-datadog-sampling-priority", "-1"},
{"x-datadog-tags", "_dd.p.tid=66718e8c00000000"},
{"x-datadog-delegate-trace-sampling", "delegate"},
};

MockDictReader propagation_reader{headers};
const auto maybe_span = tracer.extract_span(propagation_reader);
REQUIRE(maybe_span);

auto sampling_decision = maybe_span->trace_segment().sampling_decision();
if (enable_sampling_delegation) {
CHECK(!sampling_decision.has_value());
} else {
REQUIRE(sampling_decision.has_value());
CHECK(sampling_decision->origin == SamplingDecision::Origin::EXTRACTED);
CHECK(sampling_decision->priority == int(SamplingPriority::USER_DROP));
}
}

TEST_CASE("_dd.is_sampling_decider") {
// This test involves three tracers: "service1", "service2", and "service3".
// Each calls the next, and each produces two spans: "local_root" and "child".
Expand Down Expand Up @@ -1451,6 +1489,7 @@ TEST_CASE("_dd.is_sampling_decider") {
config2.collector = collector;
config2.logger = logger;
config2.service = "service2";
config2.trace_sampler.sample_rate = 1; // keep all traces
config2.delegate_trace_sampling = true;

TracerConfig config3;
Expand Down Expand Up @@ -1580,7 +1619,9 @@ TEST_CASE("_dd.is_sampling_decider") {
}
REQUIRE(span.service != "service1");
if (span.service == "service2" && span.name == "local_root") {
REQUIRE(span.tags.count(tags::internal::sampling_decider) == 0);
const bool made_the_decision = service3_delegation_enabled ? 0 : 1;
REQUIRE(span.tags.count(tags::internal::sampling_decider) ==
made_the_decision);
REQUIRE(span.numeric_tags.count(tags::internal::sampling_priority) ==
1);
REQUIRE(span.numeric_tags.at(tags::internal::sampling_priority) ==
Expand All @@ -1595,8 +1636,9 @@ TEST_CASE("_dd.is_sampling_decider") {
}
REQUIRE(span.service != "service2");
if (span.service == "service3" && span.name == "local_root") {
REQUIRE(span.tags.count(tags::internal::sampling_decider) == 1);
REQUIRE(span.tags.at(tags::internal::sampling_decider) == "1");
const bool made_the_decision = service3_delegation_enabled ? 1 : 0;
REQUIRE(span.tags.count(tags::internal::sampling_decider) ==
made_the_decision);
REQUIRE(span.numeric_tags.count(tags::internal::sampling_priority) ==
1);
REQUIRE(span.numeric_tags.at(tags::internal::sampling_priority) ==
Expand Down Expand Up @@ -1649,27 +1691,13 @@ TEST_CASE("sampling delegation is not an override") {
//
// The idea is that service2 does not perform delegation when service1 already
// made a decision and did not request delegation.
auto service1_sampling_priority =
GENERATE(values<Optional<int>>({nullopt, -1, 0, 1, 2}));
auto service1_delegate = GENERATE(true, false);
auto service3_sample_rate = GENERATE(0.0, 1.0);

int expected_sampling_priority;
if (service1_sampling_priority.has_value() && !service1_delegate) {
// Service1 made a decision and didn't ask for delegation, so that's the
// decision.
expected_sampling_priority = *service1_sampling_priority;
} else {
// Service1 either didn't make a decision or did request delegation, so the
// decision will be delegated through service2 to service3, whose decision
// depends on its configured sample rate.
expected_sampling_priority = service3_sample_rate == 0.0 ? -1 : 2;
}
const int service3_sampling_priority = service3_sample_rate == 0.0 ? -1 : 2;

CAPTURE(service1_sampling_priority);
CAPTURE(service1_delegate);
CAPTURE(service3_sample_rate);
CAPTURE(expected_sampling_priority);

const auto collector = std::make_shared<MockCollector>();
const auto logger = std::make_shared<MockLogger>();
Expand Down Expand Up @@ -1697,6 +1725,7 @@ TEST_CASE("sampling delegation is not an override") {
config3.logger = logger;
config3.extraction_styles = config1.injection_styles = styles;
config3.service = "service3";
config3.delegate_trace_sampling = true;
config3.trace_sampler.sample_rate = service3_sample_rate;

auto valid_config = finalize_config(config1);
Expand Down Expand Up @@ -1725,18 +1754,16 @@ TEST_CASE("sampling delegation is not an override") {
span_config.name = "local_root";
Span span1 = tracer1.create_span(span_config);
span1.inject(propagation_writer);
if (service1_sampling_priority.has_value()) {
propagation_writer.items["x-datadog-sampling-priority"] =
std::to_string(*service1_sampling_priority);
} else {
propagation_writer.items.erase("x-datadog-sampling-priority");
}

{
auto span2 = tracer2.extract_span(propagation_reader, span_config);
REQUIRE(span2);
propagation_writer.items.clear();
span2->inject(propagation_writer);
const bool expected_delegate_header = service1_delegate ? 1 : 0;
CHECK(
propagation_writer.items.count("x-datadog-delegate-trace-sampling") ==
expected_delegate_header);

{
auto span3 = tracer3.extract_span(propagation_reader, span_config);
Expand Down Expand Up @@ -1767,11 +1794,11 @@ TEST_CASE("sampling delegation is not an override") {
// If `service1_delegate` is false, then service1's sampling decision was
// made by the service1's sampler, which will result in priority 2.
// Otherwise, it's the same priority expected for the other spans.
if (span.service == "service1" && !service1_delegate) {
if (!service1_delegate) {
REQUIRE(span.numeric_tags.at(tags::internal::sampling_priority) == 2);
} else {
REQUIRE(span.numeric_tags.at(tags::internal::sampling_priority) ==
expected_sampling_priority);
service3_sampling_priority);
}
}
}
Expand Down
Loading