Skip to content

Commit

Permalink
Attach local root span id to profiling samples as a number not as a s…
Browse files Browse the repository at this point in the history
…tring

This is a newly-added feature to both the profiling backend, as well as
libdatadog.

Instead of sending the local root span ids as strings (that take up
space in the string table, etc), we can now send them as numbers.

We already did a similar migration for span ids in #2476, but at
the time we did not change the local root span ids because the
libdatadog `ddog_prof_Profile_set_endpoint` API could not properly
handle the numeric ids (this has been changed in
DataDog/libdatadog#80 ).

As far as testing goes, I could have chosen to abstract away this
change from our specs BUT I chose to make it explicit and changed the
specs to match. I like having visibility in the specs of the values
being encoded and passed around as numbers and not strings.
  • Loading branch information
ivoanjo committed Jan 6, 2023
1 parent e030ed9 commit 7363b3c
Show file tree
Hide file tree
Showing 7 changed files with 37 additions and 31 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -124,12 +124,9 @@ struct per_thread_context {

// Used to correlate profiles with traces
struct trace_identifiers {
#define MAXIMUM_LENGTH_64_BIT_IDENTIFIER 21 // Why 21? 2^64 => 20 digits + 1 for \0

bool valid;
ddog_CharSlice local_root_span_id;
uint64_t local_root_span_id;
uint64_t span_id;
char local_root_span_id_buffer[MAXIMUM_LENGTH_64_BIT_IDENTIFIER];
VALUE trace_endpoint;
};

Expand Down Expand Up @@ -576,7 +573,7 @@ static void trigger_sample_for_thread(
trace_identifiers_for(state, thread, &trace_identifiers_result);

if (trace_identifiers_result.valid) {
labels[label_pos++] = (ddog_prof_Label) {.key = DDOG_CHARSLICE_C("local root span id"), .str = trace_identifiers_result.local_root_span_id};
labels[label_pos++] = (ddog_prof_Label) {.key = DDOG_CHARSLICE_C("local root span id"), .num = trace_identifiers_result.local_root_span_id};
labels[label_pos++] = (ddog_prof_Label) {.key = DDOG_CHARSLICE_C("span id"), .num = trace_identifiers_result.span_id};

if (trace_identifiers_result.trace_endpoint != Qnil) {
Expand Down Expand Up @@ -851,13 +848,7 @@ static void trace_identifiers_for(struct cpu_and_wall_time_collector_state *stat
VALUE numeric_span_id = rb_ivar_get(active_span, at_id_id /* @id */);
if (numeric_local_root_span_id == Qnil || numeric_span_id == Qnil) return;

unsigned long long local_root_span_id = NUM2ULL(numeric_local_root_span_id);
snprintf(trace_identifiers_result->local_root_span_id_buffer, MAXIMUM_LENGTH_64_BIT_IDENTIFIER, "%llu", local_root_span_id);

trace_identifiers_result->local_root_span_id = (ddog_CharSlice) {
.ptr = trace_identifiers_result->local_root_span_id_buffer,
.len = strlen(trace_identifiers_result->local_root_span_id_buffer)
};
trace_identifiers_result->local_root_span_id = NUM2ULL(numeric_local_root_span_id);
trace_identifiers_result->span_id = NUM2ULL(numeric_span_id);

trace_identifiers_result->valid = true;
Expand Down
17 changes: 14 additions & 3 deletions ext/ddtrace_profiling_native_extension/collectors_stack.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ static VALUE _native_sample(
VALUE recorder_instance,
VALUE metric_values_hash,
VALUE labels_array,
VALUE numeric_labels_array,
VALUE max_frames,
VALUE in_gc
);
Expand Down Expand Up @@ -60,7 +61,7 @@ void collectors_stack_init(VALUE profiling_module) {
// Hosts methods used for testing the native code using RSpec
VALUE testing_module = rb_define_module_under(collectors_stack_class, "Testing");

rb_define_singleton_method(testing_module, "_native_sample", _native_sample, 6);
rb_define_singleton_method(testing_module, "_native_sample", _native_sample, 7);

missing_string = rb_str_new2("");
rb_global_variable(&missing_string);
Expand All @@ -74,11 +75,13 @@ static VALUE _native_sample(
VALUE recorder_instance,
VALUE metric_values_hash,
VALUE labels_array,
VALUE numeric_labels_array,
VALUE max_frames,
VALUE in_gc
) {
ENFORCE_TYPE(metric_values_hash, T_HASH);
ENFORCE_TYPE(labels_array, T_ARRAY);
ENFORCE_TYPE(numeric_labels_array, T_ARRAY);

if (RHASH_SIZE(metric_values_hash) != ENABLED_VALUE_TYPES_COUNT) {
rb_raise(
Expand All @@ -95,17 +98,25 @@ static VALUE _native_sample(
metric_values[i] = NUM2LONG(metric_value);
}

long labels_count = RARRAY_LEN(labels_array);
long labels_count = RARRAY_LEN(labels_array) + RARRAY_LEN(numeric_labels_array);
ddog_prof_Label labels[labels_count];

for (int i = 0; i < labels_count; i++) {
for (int i = 0; i < RARRAY_LEN(labels_array); i++) {
VALUE key_str_pair = rb_ary_entry(labels_array, i);

labels[i] = (ddog_prof_Label) {
.key = char_slice_from_ruby_string(rb_ary_entry(key_str_pair, 0)),
.str = char_slice_from_ruby_string(rb_ary_entry(key_str_pair, 1))
};
}
for (int i = 0; i < RARRAY_LEN(numeric_labels_array); i++) {
VALUE key_str_pair = rb_ary_entry(numeric_labels_array, i);

labels[i + RARRAY_LEN(labels_array)] = (ddog_prof_Label) {
.key = char_slice_from_ruby_string(rb_ary_entry(key_str_pair, 0)),
.num = NUM2ULL(rb_ary_entry(key_str_pair, 1))
};
}

int max_frames_requested = NUM2INT(max_frames);
if (max_frames_requested < 0) rb_raise(rb_eArgError, "Invalid max_frames: value must not be negative");
Expand Down
5 changes: 3 additions & 2 deletions ext/ddtrace_profiling_native_extension/stack_recorder.c
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,7 @@ void record_sample(VALUE recorder_instance, ddog_prof_Sample sample) {
sampler_unlock_active_profile(active_slot);
}

void record_endpoint(VALUE recorder_instance, ddog_CharSlice local_root_span_id, ddog_CharSlice endpoint) {
void record_endpoint(VALUE recorder_instance, uint64_t local_root_span_id, ddog_CharSlice endpoint) {
struct stack_recorder_state *state;
TypedData_Get_Struct(recorder_instance, struct stack_recorder_state, &stack_recorder_typed_data, state);

Expand Down Expand Up @@ -482,6 +482,7 @@ static void serializer_set_start_timestamp_for_next_profile(struct stack_recorde
}

static VALUE _native_record_endpoint(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE local_root_span_id, VALUE endpoint) {
record_endpoint(recorder_instance, char_slice_from_ruby_string(local_root_span_id), char_slice_from_ruby_string(endpoint));
ENFORCE_TYPE(local_root_span_id, T_FIXNUM);
record_endpoint(recorder_instance, NUM2ULL(local_root_span_id), char_slice_from_ruby_string(endpoint));
return Qtrue;
}
2 changes: 1 addition & 1 deletion ext/ddtrace_profiling_native_extension/stack_recorder.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,5 +35,5 @@ static const ddog_prof_ValueType enabled_value_types[] = {
#define ENABLED_VALUE_TYPES_COUNT (sizeof(enabled_value_types) / sizeof(ddog_prof_ValueType))

void record_sample(VALUE recorder_instance, ddog_prof_Sample sample);
void record_endpoint(VALUE recorder_instance, ddog_CharSlice local_root_span_id, ddog_CharSlice endpoint);
void record_endpoint(VALUE recorder_instance, uint64_t local_root_span_id, ddog_CharSlice endpoint);
VALUE enforce_recorder_instance(VALUE object);
Original file line number Diff line number Diff line change
Expand Up @@ -356,7 +356,7 @@ def stats
sample

expect(t1_sample.fetch(:labels)).to include(
:'local root span id' => @t1_local_root_span_id.to_s,
:'local root span id' => @t1_local_root_span_id.to_i,
:'span id' => @t1_span_id.to_i,
)
end
Expand Down
3 changes: 2 additions & 1 deletion spec/datadog/profiling/collectors/stack_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@
let(:gathered_stack) { stacks.fetch(:gathered) }

def sample(thread, recorder_instance, metric_values_hash, labels_array, max_frames: 400, in_gc: false)
described_class::Testing._native_sample(thread, recorder_instance, metric_values_hash, labels_array, max_frames, in_gc)
numeric_labels_array = []
described_class::Testing._native_sample(thread, recorder_instance, metric_values_hash, labels_array, numeric_labels_array, max_frames, in_gc)
end

# This spec explicitly tests the main thread because an unpatched rb_profile_frames returns one more frame in the
Expand Down
24 changes: 13 additions & 11 deletions spec/datadog/profiling/stack_recorder_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
RSpec.describe Datadog::Profiling::StackRecorder do
before { skip_if_profiling_not_supported(self) }

let(:numeric_labels) { [] }

subject(:stack_recorder) { described_class.new }

# NOTE: A lot of libdatadog integration behaviors are tested in the Collectors::Stack specs, since we need actual
Expand Down Expand Up @@ -144,7 +146,7 @@ def sample_types_from(decoded_profile)

before do
Datadog::Profiling::Collectors::Stack::Testing
._native_sample(Thread.current, stack_recorder, metric_values, labels, 400, false)
._native_sample(Thread.current, stack_recorder, metric_values, labels, numeric_labels, 400, false)
expect(samples.size).to be 1
end

Expand Down Expand Up @@ -175,23 +177,23 @@ def sample_types_from(decoded_profile)
end

describe 'trace endpoint behavior' do
let(:metric_values) { { 'cpu-time' => 123, 'cpu-samples' => 1, 'wall-time' => 789 } }
let(:metric_values) { { 'cpu-time' => 101, 'cpu-samples' => 1, 'wall-time' => 789 } }
let(:samples) { samples_from_pprof(encoded_pprof) }

it 'includes the endpoint for all matching samples taken before and after recording the endpoint' do
local_root_span_id_with_endpoint = { 'local root span id' => '123' }
local_root_span_id_without_endpoint = { 'local root span id' => '456' }
local_root_span_id_with_endpoint = { 'local root span id' => 123 }
local_root_span_id_without_endpoint = { 'local root span id' => 456 }

sample = proc do |labels = {}|
sample = proc do |numeric_labels = {}|
Datadog::Profiling::Collectors::Stack::Testing
._native_sample(Thread.current, stack_recorder, metric_values, labels.to_a, 400, false)
._native_sample(Thread.current, stack_recorder, metric_values, [], numeric_labels.to_a, 400, false)
end

sample.call
sample.call(local_root_span_id_without_endpoint)
sample.call(local_root_span_id_with_endpoint)

described_class::Testing._native_record_endpoint(stack_recorder, '123', 'recorded-endpoint')
described_class::Testing._native_record_endpoint(stack_recorder, 123, 'recorded-endpoint')

sample.call
sample.call(local_root_span_id_without_endpoint)
Expand All @@ -201,12 +203,12 @@ def sample_types_from(decoded_profile)

# Other samples have not been changed
expect(samples.select { |it| it[:labels].empty? }).to have(2).items
expect(samples.select { |it| it[:labels] == { :'local root span id' => '456' } }).to have(2).items
expect(samples.select { |it| it[:labels] == { :'local root span id' => 456 } }).to have(2).items

# Matching samples taken before and after recording the endpoint have been changed
expect(
samples.select do |it|
it[:labels] == { :'local root span id' => '123', :'trace endpoint' => 'recorded-endpoint' }
it[:labels] == { :'local root span id' => 123, :'trace endpoint' => 'recorded-endpoint' }
end
).to have(2).items
end
Expand Down Expand Up @@ -314,15 +316,15 @@ def sample_types_from(decoded_profile)
it 'makes the next calls to serialize return no data' do
# Add some data
Datadog::Profiling::Collectors::Stack::Testing
._native_sample(Thread.current, stack_recorder, metric_values, labels, 400, false)
._native_sample(Thread.current, stack_recorder, metric_values, labels, numeric_labels, 400, false)

# Sanity check: validate that data is there, to avoid the test passing because of other issues
sanity_check_samples = samples_from_pprof(stack_recorder.serialize.last)
expect(sanity_check_samples.size).to be 1

# Add some data, again
Datadog::Profiling::Collectors::Stack::Testing
._native_sample(Thread.current, stack_recorder, metric_values, labels, 400, false)
._native_sample(Thread.current, stack_recorder, metric_values, labels, numeric_labels, 400, false)

reset_after_fork

Expand Down

0 comments on commit 7363b3c

Please sign in to comment.