diff --git a/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time.c b/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time.c index 15b1a419f29..c31bf15279a 100644 --- a/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time.c +++ b/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time.c @@ -66,10 +66,12 @@ #define IS_NOT_WALL_TIME false #define MISSING_TRACER_CONTEXT_KEY 0 -static ID at_active_trace_id; // id of :@active_trace in Ruby -static ID at_root_span_id; // id of :@root_span in Ruby static ID at_active_span_id; // id of :@active_span in Ruby +static ID at_active_trace_id; // id of :@active_trace in Ruby static ID at_id_id; // id of :@id in Ruby +static ID at_resource_id; // id of :@resource in Ruby +static ID at_root_span_id; // id of :@root_span in Ruby +static ID at_type_id; // id of :@type in Ruby // Contains state for a single CpuAndWallTime instance struct cpu_and_wall_time_collector_state { @@ -129,6 +131,7 @@ struct trace_identifiers { ddog_CharSlice span_id; char local_root_span_id_buffer[MAXIMUM_LENGTH_64_BIT_IDENTIFIER]; char span_id_buffer[MAXIMUM_LENGTH_64_BIT_IDENTIFIER]; + VALUE trace_endpoint; }; static void cpu_and_wall_time_collector_typed_data_mark(void *state_ptr); @@ -165,6 +168,7 @@ static long wall_time_now_ns(bool raise_on_failure); static long thread_id_for(VALUE thread); static VALUE _native_stats(VALUE self, VALUE collector_instance); static void trace_identifiers_for(struct cpu_and_wall_time_collector_state *state, VALUE thread, struct trace_identifiers *trace_identifiers_result); +static bool is_type_web(VALUE root_span_type); static VALUE _native_reset_after_fork(DDTRACE_UNUSED VALUE self, VALUE collector_instance); void collectors_cpu_and_wall_time_init(VALUE profiling_module) { @@ -194,10 +198,12 @@ void collectors_cpu_and_wall_time_init(VALUE profiling_module) { rb_define_singleton_method(testing_module, "_native_per_thread_context", _native_per_thread_context, 1); rb_define_singleton_method(testing_module, "_native_stats", _native_stats, 1); - at_active_trace_id = rb_intern_const("@active_trace"); - at_root_span_id = rb_intern_const("@root_span"); at_active_span_id = rb_intern_const("@active_span"); + at_active_trace_id = rb_intern_const("@active_trace"); at_id_id = rb_intern_const("@id"); + at_resource_id = rb_intern_const("@resource"); + at_root_span_id = rb_intern_const("@root_span"); + at_type_id = rb_intern_const("@type"); } // This structure is used to define a Ruby object that stores a pointer to a struct cpu_and_wall_time_collector_state @@ -572,12 +578,29 @@ static void trigger_sample_for_thread( }; } - struct trace_identifiers trace_identifiers_result = {.valid = false}; + struct trace_identifiers trace_identifiers_result = {.valid = false, .trace_endpoint = Qnil}; trace_identifiers_for(state, thread, &trace_identifiers_result); if (trace_identifiers_result.valid) { labels[label_pos++] = (ddog_Label) {.key = DDOG_CHARSLICE_C("local root span id"), .str = trace_identifiers_result.local_root_span_id}; labels[label_pos++] = (ddog_Label) {.key = DDOG_CHARSLICE_C("span id"), .str = trace_identifiers_result.span_id}; + + if (trace_identifiers_result.trace_endpoint != Qnil) { + // The endpoint gets recorded in a different way because it is mutable in the tracer and can change during a + // trace. + // + // Instead of each sample for the same local_root_span_id getting a potentially-different endpoint, + // `record_endpoint` (via libdatadog) keeps a list of local_root_span_id values and their most-recently-seen + // endpoint values, and at serialization time the most-recently-seen endpoint is applied to all relevant samples. + // + // This is why the endpoint is not directly added in this function to the labels array, although it will later + // show up in the array in the output pprof. + record_endpoint( + state->recorder_instance, + trace_identifiers_result.local_root_span_id, + char_slice_from_ruby_string(trace_identifiers_result.trace_endpoint) + ); + } } // The number of times `label_pos++` shows up in this function needs to match `max_label_count`. To avoid "oops I @@ -864,6 +887,24 @@ static void trace_identifiers_for(struct cpu_and_wall_time_collector_state *stat }; trace_identifiers_result->valid = true; + + VALUE root_span_type = rb_ivar_get(root_span, at_type_id /* @type */); + if (root_span_type == Qnil || !is_type_web(root_span_type)) return; + + VALUE trace_resource = rb_ivar_get(active_trace, at_resource_id /* @resource */); + if (RB_TYPE_P(trace_resource, T_STRING)) { + trace_identifiers_result->trace_endpoint = trace_resource; + } else if (trace_resource == Qnil) { + // Fall back to resource from span, if any + trace_identifiers_result->trace_endpoint = rb_ivar_get(root_span, at_resource_id /* @resource */); + } +} + +static bool is_type_web(VALUE root_span_type) { + ENFORCE_TYPE(root_span_type, T_STRING); + + return RSTRING_LEN(root_span_type) == strlen("web") && + (memcmp("web", StringValuePtr(root_span_type), strlen("web")) == 0); } // After the Ruby VM forks, this method gets called in the child process to clean up any leftover state from the parent. diff --git a/lib/datadog/profiling/collectors/cpu_and_wall_time.rb b/lib/datadog/profiling/collectors/cpu_and_wall_time.rb index be2e051862e..cf9267a2f54 100644 --- a/lib/datadog/profiling/collectors/cpu_and_wall_time.rb +++ b/lib/datadog/profiling/collectors/cpu_and_wall_time.rb @@ -30,10 +30,12 @@ def reset_after_fork private def safely_extract_context_key_from(tracer) - tracer && - tracer.respond_to?(:provider) && - # NOTE: instance_variable_get always works, even on nil -- it just returns nil if the variable doesn't exist - tracer.provider.instance_variable_get(:@context).instance_variable_get(:@key) + provider = tracer && tracer.respond_to?(:provider) && tracer.provider + + return unless provider + + context = provider.instance_variable_get(:@context) + context && context.instance_variable_get(:@key) end end end diff --git a/spec/datadog/profiling/collectors/cpu_and_wall_time_spec.rb b/spec/datadog/profiling/collectors/cpu_and_wall_time_spec.rb index 4d02529f4f4..5bebd54c68e 100644 --- a/spec/datadog/profiling/collectors/cpu_and_wall_time_spec.rb +++ b/spec/datadog/profiling/collectors/cpu_and_wall_time_spec.rb @@ -327,11 +327,15 @@ def stats end context 'when thread has a tracer context, and a trace is in progress' do + let(:root_span_type) { 'not-web' } + let(:t1) do Thread.new(ready_queue) do |ready_queue| - Datadog::Tracing.trace('profiler.test') do - Datadog::Tracing.trace('profiler.test.inner') do |span, trace| - @t1_span_id = span.id + Datadog::Tracing.trace('profiler.test', type: root_span_type) do |_span, trace| + @t1_trace = trace + + Datadog::Tracing.trace('profiler.test.inner') do |inner_span| + @t1_span_id = inner_span.id @t1_local_root_span_id = trace.send(:root_span).id ready_queue << true sleep @@ -361,6 +365,106 @@ def stats :'span id' => @t1_span_id.to_s, ) end + + it 'does not include the "trace endpoint" label' do + sample + + expect(t1_sample.fetch(:labels).keys).to_not include(:'trace endpoint') + end + + context 'when local root span type is web' do + let(:root_span_type) { 'web' } + + it 'includes the "trace endpoint" label in the samples' do + sample + + expect(t1_sample.fetch(:labels)).to include(:'trace endpoint' => 'profiler.test') + end + + describe 'trace vs root span resource mutation' do + let(:t1) do + Thread.new(ready_queue) do |ready_queue| + Datadog::Tracing.trace('profiler.test', type: root_span_type) do |span, trace| + trace.resource = trace_resource + span.resource = root_span_resource + + Datadog::Tracing.trace('profiler.test.inner') do |inner_span| + @t1_span_id = inner_span.id + @t1_local_root_span_id = trace.send(:root_span).id + ready_queue << true + sleep + end + end + end + end + + context 'when the trace resource is nil but the root span resource is not nil' do + let(:trace_resource) { nil } + let(:root_span_resource) { 'root_span_resource' } + + it 'includes the "trace endpoint" label in the samples with the root span resource' do + sample + + expect(t1_sample.fetch(:labels)).to include(:'trace endpoint' => 'root_span_resource') + end + end + + context 'when both the trace resource and the root span resource are specified' do + let(:trace_resource) { 'trace_resource' } + let(:root_span_resource) { 'root_span_resource' } + + it 'includes the "trace endpoint" label in the samples with the trace resource' do + sample + + expect(t1_sample.fetch(:labels)).to include(:'trace endpoint' => 'trace_resource') + end + end + + context 'when both the trace resource and the root span resource are nil' do + let(:trace_resource) { nil } + let(:root_span_resource) { nil } + + it 'does not include the "trace endpoint" label' do + sample + + expect(t1_sample.fetch(:labels).keys).to_not include(:'trace endpoint') + end + end + end + + context 'when resource is changed after a sample was taken' do + before do + sample + @t1_trace.resource = 'changed_after_first_sample' + end + + it 'changes the "trace endpoint" label in all samples' do + sample + + t1_samples = samples_for_thread(samples, t1) + + expect(t1_samples).to have(1).item + expect(t1_samples.first.fetch(:labels)).to include(:'trace endpoint' => 'changed_after_first_sample') + expect(t1_samples.first.fetch(:values)).to include(:'cpu-samples' => 2) + end + + context 'when the resource is changed multiple times' do + it 'changes the "trace endpoint" label in all samples' do + sample + + @t1_trace.resource = 'changed_after_second_sample' + + sample + + t1_samples = samples_for_thread(samples, t1) + + expect(t1_samples).to have(1).item + expect(t1_samples.first.fetch(:labels)).to include(:'trace endpoint' => 'changed_after_second_sample') + expect(t1_samples.first.fetch(:values)).to include(:'cpu-samples' => 3) + end + end + end + end end end end diff --git a/spec/datadog/profiling/spec_helper.rb b/spec/datadog/profiling/spec_helper.rb index 3f27cba6fc1..71dd0856111 100644 --- a/spec/datadog/profiling/spec_helper.rb +++ b/spec/datadog/profiling/spec_helper.rb @@ -65,7 +65,7 @@ def samples_from_pprof(pprof_data) labels: sample.label.map do |it| [ string_table[it.key].to_sym, - it.str != 0 ? string_table[it.str] : raise('Unexpected: label encoded as number instead of string'), + it.num == 0 ? string_table[it.str] : raise('Unexpected: label encoded as number instead of string'), ] end.to_h, }