From f3da4576c600504e6a55b7f602128be14d2e7cc5 Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Mon, 4 Dec 2023 13:19:13 +0000 Subject: [PATCH 01/21] Refactor placeholder stack recording to be reusable --- .../collectors_stack.c | 22 +++++++++---------- 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/ext/ddtrace_profiling_native_extension/collectors_stack.c b/ext/ddtrace_profiling_native_extension/collectors_stack.c index 7611f13e41e..adeb7f6157f 100644 --- a/ext/ddtrace_profiling_native_extension/collectors_stack.c +++ b/ext/ddtrace_profiling_native_extension/collectors_stack.c @@ -36,13 +36,14 @@ static VALUE _native_sample( VALUE in_gc ); static void maybe_add_placeholder_frames_omitted(VALUE thread, sampling_buffer* buffer, char *frames_omitted_message, int frames_omitted_message_size); -static void record_placeholder_stack_in_native_code( +static void record_placeholder_stack( sampling_buffer* buffer, VALUE recorder_instance, sample_values values, sample_labels labels, sampling_buffer *record_buffer, - int extra_frames_in_record_buffer + int extra_frames_in_record_buffer, + ddog_prof_Function placeholder_stack ); static void sample_thread_internal( VALUE thread, @@ -218,13 +219,14 @@ static void sample_thread_internal( ); if (captured_frames == PLACEHOLDER_STACK_IN_NATIVE_CODE) { - record_placeholder_stack_in_native_code( + record_placeholder_stack( buffer, recorder_instance, values, labels, record_buffer, - extra_frames_in_record_buffer + extra_frames_in_record_buffer, + (ddog_prof_Function) {.name = DDOG_CHARSLICE_C(""), .filename = DDOG_CHARSLICE_C("In native code")} ); return; } @@ -379,20 +381,16 @@ static void maybe_add_placeholder_frames_omitted(VALUE thread, sampling_buffer* // // To give customers visibility into these threads, rather than reporting an empty stack, we replace the empty stack // with one containing a placeholder frame, so that these threads are properly represented in the UX. -static void record_placeholder_stack_in_native_code( +static void record_placeholder_stack( sampling_buffer* buffer, VALUE recorder_instance, sample_values values, sample_labels labels, sampling_buffer *record_buffer, - int extra_frames_in_record_buffer + int extra_frames_in_record_buffer, + ddog_prof_Function placeholder_stack ) { - ddog_CharSlice function_name = DDOG_CHARSLICE_C(""); - ddog_CharSlice function_filename = DDOG_CHARSLICE_C("In native code"); - buffer->locations[0] = (ddog_prof_Location) { - .function = (ddog_prof_Function) {.name = function_name, .filename = function_filename}, - .line = 0 - }; + buffer->locations[0] = (ddog_prof_Location) {.function = placeholder_stack, .line = 0}; record_sample( recorder_instance, From 3bca25198ab753d6664d1da988618b5787d0b9ae Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Mon, 4 Dec 2023 18:12:15 +0000 Subject: [PATCH 02/21] Garbage Collection samples no longer collect stacks When this feature was added, we weren't yet working an allocation profiling, so having stacks would hint at where an allocation was that triggered the GC. This... is basically a very crappy allocation profiler, and as an anti-bonus it can also get quite expensive. As part of making this feature shape up for us to have it on by default, let's no longer collect stacks. As an alternative, you can get a similar context either from allocation profiling OR by looking at thread timeline for what the thread was doing around the time the GC happened. --- .../collectors_stack.c | 32 +++++------- .../profiling/collectors/stack_spec.rb | 49 ++----------------- 2 files changed, 16 insertions(+), 65 deletions(-) diff --git a/ext/ddtrace_profiling_native_extension/collectors_stack.c b/ext/ddtrace_profiling_native_extension/collectors_stack.c index adeb7f6157f..1b70e069047 100644 --- a/ext/ddtrace_profiling_native_extension/collectors_stack.c +++ b/ext/ddtrace_profiling_native_extension/collectors_stack.c @@ -145,34 +145,26 @@ void sample_thread( sample_labels labels, sample_type type ) { + sampling_buffer *record_buffer = buffer; + int extra_frames_in_record_buffer = 0; + // Samples thread into recorder if (type == SAMPLE_REGULAR) { - sampling_buffer *record_buffer = buffer; - int extra_frames_in_record_buffer = 0; sample_thread_internal(thread, buffer, recorder_instance, values, labels, record_buffer, extra_frames_in_record_buffer); return; } // Samples thread into recorder, including as a top frame in the stack a frame named "Garbage Collection" if (type == SAMPLE_IN_GC) { - ddog_CharSlice function_name = DDOG_CHARSLICE_C(""); - ddog_CharSlice function_filename = DDOG_CHARSLICE_C("Garbage Collection"); - buffer->locations[0] = (ddog_prof_Location) { - .function = (ddog_prof_Function) {.name = function_name, .filename = function_filename}, - .line = 0 - }; - // To avoid changing sample_thread_internal, we just prepare a new buffer struct that uses the same underlying storage as the - // original buffer, but has capacity one less, so that we can keep the above Garbage Collection frame untouched. - sampling_buffer thread_in_gc_buffer = (struct sampling_buffer) { - .max_frames = buffer->max_frames - 1, - .stack_buffer = buffer->stack_buffer + 1, - .lines_buffer = buffer->lines_buffer + 1, - .is_ruby_frame = buffer->is_ruby_frame + 1, - .locations = buffer->locations + 1, - }; - sampling_buffer *record_buffer = buffer; // We pass in the original buffer as the record_buffer, but not as the regular buffer - int extra_frames_in_record_buffer = 1; - sample_thread_internal(thread, &thread_in_gc_buffer, recorder_instance, values, labels, record_buffer, extra_frames_in_record_buffer); + record_placeholder_stack( + buffer, + recorder_instance, + values, + labels, + record_buffer, + extra_frames_in_record_buffer, + (ddog_prof_Function) {.name = DDOG_CHARSLICE_C(""), .filename = DDOG_CHARSLICE_C("Garbage Collection")} + ); return; } diff --git a/spec/datadog/profiling/collectors/stack_spec.rb b/spec/datadog/profiling/collectors/stack_spec.rb index aee41a3f3f5..38c851cbe0d 100644 --- a/spec/datadog/profiling/collectors/stack_spec.rb +++ b/spec/datadog/profiling/collectors/stack_spec.rb @@ -60,14 +60,8 @@ def sample(thread, recorder_instance, metric_values_hash, labels_array, max_fram context 'when marking sample as being in garbage collection' do let(:in_gc) { true } - it 'includes a placeholder frame for garbage collection' do - expect(stacks.fetch(:gathered)[0]).to have_attributes(base_label: '', path: 'Garbage Collection', lineno: 0) - end - - it 'matches the Ruby backtrace API' do - # We skip 4 frames here -- the garbage collection placeholder, as well as the 3 top stacks that differ from the - # reference stack (see the `let(:gathered_stack)` above for details) - expect(stacks.fetch(:gathered)[4..-1]).to eq reference_stack + it 'gathers a one-element stack with a "Garbage Collection" placeholder' do + expect(stacks.fetch(:gathered)).to contain_exactly(have_attributes(base_label: '', path: 'Garbage Collection', lineno: 0)) end end end @@ -491,38 +485,6 @@ def call_sleep expect(gathered_stack).to eq reference_stack end end - - context 'when marking sample as being in garbage collection' do - let(:in_gc) { true } - - it 'gathers exactly max_frames frames' do - expect(gathered_stack.size).to be max_frames - end - - it 'matches the Ruby backtrace API, up to max_frames - 2' do - garbage_collection = 1 - expect(gathered_stack[(0 + garbage_collection)...(max_frames - 1)]).to eq reference_stack[0...(max_frames - 1 - garbage_collection)] - end - - it 'includes two placeholder frames: one for garbage collection and another for including the number of skipped frames' do - garbage_collection = 1 - placeholder = 1 - omitted_frames = target_stack_depth - max_frames + placeholder + garbage_collection - - expect(omitted_frames).to be 97 - expect(gathered_stack.last).to have_attributes(base_label: '', path: '97 frames omitted', lineno: 0) - expect(gathered_stack.first).to have_attributes(base_label: '', path: 'Garbage Collection', lineno: 0) - end - - context 'when stack is exactly one item less as deep as the configured max_frames' do - let(:target_stack_depth) { 4 } - - it 'includes a placeholder frame for garbage collection and matches the Ruby backtrace API' do - garbage_collection = 1 - expect(gathered_stack[(0 + garbage_collection)..-1]).to eq reference_stack - end - end - end end context 'when sampling a dead thread' do @@ -600,11 +562,8 @@ def call_sleep context 'when marking sample as being in garbage collection' do let(:in_gc) { true } - it 'gathers a two-element stack with a placeholder for "In native code" and another for garbage collection' do - expect(gathered_stack).to contain_exactly( - have_attributes(base_label: '', path: 'Garbage Collection', lineno: 0), - have_attributes(base_label: '', path: 'In native code', lineno: 0), - ) + it 'gathers a one-element stack with a "Garbage Collection" placeholder' do + expect(stacks.fetch(:gathered)).to contain_exactly(have_attributes(base_label: '', path: 'Garbage Collection', lineno: 0)) end end end From 1f5926bb1f20b18e5ee21fcd4234e702e655c94b Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Tue, 5 Dec 2023 10:44:50 +0000 Subject: [PATCH 03/21] Remove record_buffer/extra_frames_in_record_buffer abstraction These abstractions added some complexity to the stack collector, and were only needed to allow us to put extra placeholder frames on top of a true collected stack. Since this was only being used by the GC placeholder feature, and we've changed that to not collect a stack anymore, we can clean up this mechanism and simplify the code. --- .../collectors_stack.c | 37 +++---------------- 1 file changed, 6 insertions(+), 31 deletions(-) diff --git a/ext/ddtrace_profiling_native_extension/collectors_stack.c b/ext/ddtrace_profiling_native_extension/collectors_stack.c index 1b70e069047..2477f7f7fa2 100644 --- a/ext/ddtrace_profiling_native_extension/collectors_stack.c +++ b/ext/ddtrace_profiling_native_extension/collectors_stack.c @@ -41,8 +41,6 @@ static void record_placeholder_stack( VALUE recorder_instance, sample_values values, sample_labels labels, - sampling_buffer *record_buffer, - int extra_frames_in_record_buffer, ddog_prof_Function placeholder_stack ); static void sample_thread_internal( @@ -50,9 +48,7 @@ static void sample_thread_internal( sampling_buffer* buffer, VALUE recorder_instance, sample_values values, - sample_labels labels, - sampling_buffer *record_buffer, - int extra_frames_in_record_buffer + sample_labels labels ); void collectors_stack_init(VALUE profiling_module) { @@ -145,24 +141,19 @@ void sample_thread( sample_labels labels, sample_type type ) { - sampling_buffer *record_buffer = buffer; - int extra_frames_in_record_buffer = 0; - // Samples thread into recorder if (type == SAMPLE_REGULAR) { - sample_thread_internal(thread, buffer, recorder_instance, values, labels, record_buffer, extra_frames_in_record_buffer); + sample_thread_internal(thread, buffer, recorder_instance, values, labels); return; } - // Samples thread into recorder, including as a top frame in the stack a frame named "Garbage Collection" + // Skips sampling the stack -- records only a "Garbage Collection" frame and the values/labels if (type == SAMPLE_IN_GC) { record_placeholder_stack( buffer, recorder_instance, values, labels, - record_buffer, - extra_frames_in_record_buffer, (ddog_prof_Function) {.name = DDOG_CHARSLICE_C(""), .filename = DDOG_CHARSLICE_C("Garbage Collection")} ); return; @@ -182,24 +173,12 @@ void sample_thread( // * Should we move this into a different thread entirely? // * If we don't move it into a different thread, does releasing the GVL on a Ruby thread mean that we're introducing // a new thread switch point where there previously was none? -// -// --- -// -// Why the weird extra record_buffer and extra_frames_in_record_buffer? -// The answer is: to support both sample_thread() and sample_thread_in_gc(). -// -// For sample_thread(), buffer == record_buffer and extra_frames_in_record_buffer == 0, so it's a no-op. -// For sample_thread_in_gc(), the buffer is a special buffer that is the same as the record_buffer, but with every -// pointer shifted forward extra_frames_in_record_buffer elements, so that the caller can actually inject those extra -// frames, and this function doesn't have to care about it. static void sample_thread_internal( VALUE thread, sampling_buffer* buffer, VALUE recorder_instance, sample_values values, - sample_labels labels, - sampling_buffer *record_buffer, - int extra_frames_in_record_buffer + sample_labels labels ) { int captured_frames = ddtrace_rb_profile_frames( thread, @@ -216,8 +195,6 @@ static void sample_thread_internal( recorder_instance, values, labels, - record_buffer, - extra_frames_in_record_buffer, (ddog_prof_Function) {.name = DDOG_CHARSLICE_C(""), .filename = DDOG_CHARSLICE_C("In native code")} ); return; @@ -326,7 +303,7 @@ static void sample_thread_internal( record_sample( recorder_instance, - (ddog_prof_Slice_Location) {.ptr = record_buffer->locations, .len = captured_frames + extra_frames_in_record_buffer}, + (ddog_prof_Slice_Location) {.ptr = buffer->locations, .len = captured_frames}, values, labels ); @@ -378,15 +355,13 @@ static void record_placeholder_stack( VALUE recorder_instance, sample_values values, sample_labels labels, - sampling_buffer *record_buffer, - int extra_frames_in_record_buffer, ddog_prof_Function placeholder_stack ) { buffer->locations[0] = (ddog_prof_Location) {.function = placeholder_stack, .line = 0}; record_sample( recorder_instance, - (ddog_prof_Slice_Location) {.ptr = record_buffer->locations, .len = 1 + extra_frames_in_record_buffer}, + (ddog_prof_Slice_Location) {.ptr = buffer->locations, .len = 1}, values, labels ); From 6f922aa6e701c9eea39e8e7488371041a2c7b576 Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Tue, 5 Dec 2023 10:49:42 +0000 Subject: [PATCH 04/21] Extract `record_placeholder_stack_in_native_code` again This function is rather trivial BUT we have a really nice big comment to explain it so it's useful to extract it so we can have the comment separate, instead of moving the big comment inside the already very-complex `sample_thread_internal`. --- .../collectors_stack.c | 25 +++++++++++++------ 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/ext/ddtrace_profiling_native_extension/collectors_stack.c b/ext/ddtrace_profiling_native_extension/collectors_stack.c index 2477f7f7fa2..029e4d20c7e 100644 --- a/ext/ddtrace_profiling_native_extension/collectors_stack.c +++ b/ext/ddtrace_profiling_native_extension/collectors_stack.c @@ -36,6 +36,7 @@ static VALUE _native_sample( VALUE in_gc ); static void maybe_add_placeholder_frames_omitted(VALUE thread, sampling_buffer* buffer, char *frames_omitted_message, int frames_omitted_message_size); +static void record_placeholder_stack_in_native_code(sampling_buffer* buffer, VALUE recorder_instance, sample_values values, sample_labels labels); static void record_placeholder_stack( sampling_buffer* buffer, VALUE recorder_instance, @@ -190,13 +191,7 @@ static void sample_thread_internal( ); if (captured_frames == PLACEHOLDER_STACK_IN_NATIVE_CODE) { - record_placeholder_stack( - buffer, - recorder_instance, - values, - labels, - (ddog_prof_Function) {.name = DDOG_CHARSLICE_C(""), .filename = DDOG_CHARSLICE_C("In native code")} - ); + record_placeholder_stack_in_native_code(buffer, recorder_instance, values, labels); return; } @@ -350,6 +345,22 @@ static void maybe_add_placeholder_frames_omitted(VALUE thread, sampling_buffer* // // To give customers visibility into these threads, rather than reporting an empty stack, we replace the empty stack // with one containing a placeholder frame, so that these threads are properly represented in the UX. + +static void record_placeholder_stack_in_native_code( + sampling_buffer* buffer, + VALUE recorder_instance, + sample_values values, + sample_labels labels +) { + record_placeholder_stack( + buffer, + recorder_instance, + values, + labels, + (ddog_prof_Function) {.name = DDOG_CHARSLICE_C(""), .filename = DDOG_CHARSLICE_C("In native code")} + ); +} + static void record_placeholder_stack( sampling_buffer* buffer, VALUE recorder_instance, From 7ce8d90a504b5c6e5372196f89ff27f4c5d23156 Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Tue, 5 Dec 2023 15:49:39 +0000 Subject: [PATCH 05/21] Remove old comment While still accurate, I don't think this comment is still relevant -- we no longer have the skid issue, and the rest is just historical context that I don't think is that interesting to keep around. --- .../collectors_cpu_and_wall_time_worker.c | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time_worker.c b/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time_worker.c index af9b0127e18..500f3c098c8 100644 --- a/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time_worker.c +++ b/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time_worker.c @@ -718,22 +718,8 @@ static void on_gc_event(VALUE tracepoint_data, DDTRACE_UNUSED void *unused) { if (event == RUBY_INTERNAL_EVENT_GC_ENTER) { thread_context_collector_on_gc_start(state->thread_context_collector_instance); } else if (event == RUBY_INTERNAL_EVENT_GC_EXIT) { - // Design: In an earlier iteration of this feature (see https://github.com/DataDog/dd-trace-rb/pull/2308) we - // actually had a single method to implement the behavior of both thread_context_collector_on_gc_finish - // and thread_context_collector_sample_after_gc (the latter is called via after_gc_from_postponed_job). - // - // Unfortunately, then we discovered the safety issue around no allocations, and thus decided to separate them -- so that - // the sampling could run outside the tight safety constraints of the garbage collection process. - // - // There is a downside: The sample is now taken very very shortly afterwards the GC finishes, and not immediately - // as the GC finishes, which means the stack captured may by affected by "skid", e.g. point slightly after where - // it should be pointing at. - // Alternatives to solve this would be to capture no stack for garbage collection (as we do for Java and .net); - // making the sampling process allocation-safe (very hard); or separate stack sampling from sample recording, - // e.g. enabling us to capture the stack in thread_context_collector_on_gc_finish and do the rest later - // (medium hard). - thread_context_collector_on_gc_finish(state->thread_context_collector_instance); + // We use rb_postponed_job_register_one to ask Ruby to run thread_context_collector_sample_after_gc after if // fully finishes the garbage collection, so that one is allowed to do allocations and throw exceptions as usual. // From c3765681c205c6d5cc9d04373f95003050f94c25 Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Wed, 6 Dec 2023 18:28:58 +0000 Subject: [PATCH 06/21] Add timeline event type to stack recorder This event type will be used to report GC information for use in the profiling timeline. --- benchmarks/profiler_sample_loop_v2.rb | 3 +- .../collectors_stack.c | 1 + .../stack_recorder.c | 54 +++++++-- .../stack_recorder.h | 1 + lib/datadog/profiling/component.rb | 13 +- lib/datadog/profiling/stack_recorder.rb | 10 +- sig/datadog/profiling/component.rbs | 5 +- sig/datadog/profiling/stack_recorder.rbs | 3 +- spec/datadog/profiling/component_spec.rb | 30 ++++- spec/datadog/profiling/spec_helper.rb | 5 +- spec/datadog/profiling/stack_recorder_spec.rb | 112 +++++++++--------- 11 files changed, 158 insertions(+), 79 deletions(-) diff --git a/benchmarks/profiler_sample_loop_v2.rb b/benchmarks/profiler_sample_loop_v2.rb index 4722a6c050f..43a9e85e148 100644 --- a/benchmarks/profiler_sample_loop_v2.rb +++ b/benchmarks/profiler_sample_loop_v2.rb @@ -19,7 +19,8 @@ def create_profiler @recorder = Datadog::Profiling::StackRecorder.new( cpu_time_enabled: true, alloc_samples_enabled: false, - heap_samples_enabled: false + heap_samples_enabled: false, + timeline_enabled: false, ) @collector = Datadog::Profiling::Collectors::ThreadContext.new( recorder: @recorder, max_frames: 400, tracer: nil, endpoint_collection_enabled: false, timeline_enabled: false diff --git a/ext/ddtrace_profiling_native_extension/collectors_stack.c b/ext/ddtrace_profiling_native_extension/collectors_stack.c index 029e4d20c7e..21cb2d26c4b 100644 --- a/ext/ddtrace_profiling_native_extension/collectors_stack.c +++ b/ext/ddtrace_profiling_native_extension/collectors_stack.c @@ -86,6 +86,7 @@ static VALUE _native_sample( .cpu_or_wall_samples = NUM2UINT(rb_hash_lookup2(metric_values_hash, rb_str_new_cstr("cpu-samples"), zero)), .wall_time_ns = NUM2UINT(rb_hash_lookup2(metric_values_hash, rb_str_new_cstr("wall-time"), zero)), .alloc_samples = NUM2UINT(rb_hash_lookup2(metric_values_hash, rb_str_new_cstr("alloc-samples"), zero)), + .timeline_wall_time_ns = NUM2UINT(rb_hash_lookup2(metric_values_hash, rb_str_new_cstr("timeline"), zero)), }; long labels_count = RARRAY_LEN(labels_array) + RARRAY_LEN(numeric_labels_array); diff --git a/ext/ddtrace_profiling_native_extension/stack_recorder.c b/ext/ddtrace_profiling_native_extension/stack_recorder.c index 4918f8caea8..55ac0b25a1a 100644 --- a/ext/ddtrace_profiling_native_extension/stack_recorder.c +++ b/ext/ddtrace_profiling_native_extension/stack_recorder.c @@ -151,15 +151,19 @@ static VALUE error_symbol = Qnil; // :error in Ruby #define WALL_TIME_VALUE_ID 2 #define ALLOC_SAMPLES_VALUE {.type_ = VALUE_STRING("alloc-samples"), .unit = VALUE_STRING("count")} #define ALLOC_SAMPLES_VALUE_ID 3 -#define HEAP_SAMPLES_VALUE {.type_ = VALUE_STRING("heap-live-samples"), .unit = VALUE_STRING("count")} +#define HEAP_SAMPLES_VALUE {.type_ = VALUE_STRING("heap-live-samples"), .unit = VALUE_STRING("count")} #define HEAP_SAMPLES_VALUE_ID 4 +#define TIMELINE_VALUE {.type_ = VALUE_STRING("timeline"), .unit = VALUE_STRING("nanoseconds")} +#define TIMELINE_VALUE_ID 5 -static const ddog_prof_ValueType all_value_types[] = {CPU_TIME_VALUE, CPU_SAMPLES_VALUE, WALL_TIME_VALUE, ALLOC_SAMPLES_VALUE, HEAP_SAMPLES_VALUE}; +static const ddog_prof_ValueType all_value_types[] = + {CPU_TIME_VALUE, CPU_SAMPLES_VALUE, WALL_TIME_VALUE, ALLOC_SAMPLES_VALUE, HEAP_SAMPLES_VALUE, TIMELINE_VALUE}; // This array MUST be kept in sync with all_value_types above and is intended to act as a "hashmap" between VALUE_ID and the position it // occupies on the all_value_types array. // E.g. all_value_types_positions[CPU_TIME_VALUE_ID] => 0, means that CPU_TIME_VALUE was declared at position 0 of all_value_types. -static const uint8_t all_value_types_positions[] = {CPU_TIME_VALUE_ID, CPU_SAMPLES_VALUE_ID, WALL_TIME_VALUE_ID, ALLOC_SAMPLES_VALUE_ID, HEAP_SAMPLES_VALUE_ID}; +static const uint8_t all_value_types_positions[] = + {CPU_TIME_VALUE_ID, CPU_SAMPLES_VALUE_ID, WALL_TIME_VALUE_ID, ALLOC_SAMPLES_VALUE_ID, HEAP_SAMPLES_VALUE_ID, TIMELINE_VALUE_ID}; #define ALL_VALUE_TYPES_COUNT (sizeof(all_value_types) / sizeof(ddog_prof_ValueType)) @@ -203,7 +207,14 @@ static VALUE _native_new(VALUE klass); static void initialize_slot_concurrency_control(struct stack_recorder_state *state); static void initialize_profiles(struct stack_recorder_state *state, ddog_prof_Slice_ValueType sample_types); static void stack_recorder_typed_data_free(void *data); -static VALUE _native_initialize(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE cpu_time_enabled, VALUE alloc_samples_enabled, VALUE heap_samples_enabled); +static VALUE _native_initialize( + DDTRACE_UNUSED VALUE _self, + VALUE recorder_instance, + VALUE cpu_time_enabled, + VALUE alloc_samples_enabled, + VALUE heap_samples_enabled, + VALUE timeline_enabled +); static VALUE _native_serialize(VALUE self, VALUE recorder_instance); static VALUE ruby_time_from(ddog_Timespec ddprof_time); static void *call_serialize_without_gvl(void *call_args); @@ -237,7 +248,7 @@ void stack_recorder_init(VALUE profiling_module) { // https://bugs.ruby-lang.org/issues/18007 for a discussion around this. rb_define_alloc_func(stack_recorder_class, _native_new); - rb_define_singleton_method(stack_recorder_class, "_native_initialize", _native_initialize, 4); + rb_define_singleton_method(stack_recorder_class, "_native_initialize", _native_initialize, 5); rb_define_singleton_method(stack_recorder_class, "_native_serialize", _native_serialize, 1); rb_define_singleton_method(stack_recorder_class, "_native_reset_after_fork", _native_reset_after_fork, 1); rb_define_singleton_method(testing_module, "_native_active_slot", _native_active_slot, 1); @@ -338,28 +349,39 @@ static void stack_recorder_typed_data_free(void *state_ptr) { ruby_xfree(state); } -static VALUE _native_initialize(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE cpu_time_enabled, VALUE alloc_samples_enabled, VALUE heap_samples_enabled) { +static VALUE _native_initialize( + DDTRACE_UNUSED VALUE _self, + VALUE recorder_instance, + VALUE cpu_time_enabled, + VALUE alloc_samples_enabled, + VALUE heap_samples_enabled, + VALUE timeline_enabled +) { ENFORCE_BOOLEAN(cpu_time_enabled); ENFORCE_BOOLEAN(alloc_samples_enabled); ENFORCE_BOOLEAN(heap_samples_enabled); + ENFORCE_BOOLEAN(timeline_enabled); struct stack_recorder_state *state; TypedData_Get_Struct(recorder_instance, struct stack_recorder_state, &stack_recorder_typed_data, state); - if (cpu_time_enabled == Qtrue && alloc_samples_enabled == Qtrue && heap_samples_enabled == Qtrue) return Qtrue; // Nothing to do, this is the default + uint8_t requested_values_count = ALL_VALUE_TYPES_COUNT - + (cpu_time_enabled == Qtrue ? 0 : 1) - + (alloc_samples_enabled == Qtrue? 0 : 1) - + (heap_samples_enabled == Qtrue ? 0 : 1) - + (timeline_enabled == Qtrue ? 0 : 1); + + if (requested_values_count == ALL_VALUE_TYPES_COUNT) return Qtrue; // Nothing to do, this is the default // When some sample types are disabled, we need to reconfigure libdatadog to record less types, // as well as reconfigure the position_for array to push the disabled types to the end so they don't get recorded. // See record_sample for details on the use of position_for. - state->enabled_values_count = ALL_VALUE_TYPES_COUNT - - (cpu_time_enabled == Qtrue ? 0 : 1) - - (alloc_samples_enabled == Qtrue? 0 : 1) - - (heap_samples_enabled == Qtrue ? 0 : 1); + state->enabled_values_count = requested_values_count; ddog_prof_ValueType enabled_value_types[ALL_VALUE_TYPES_COUNT]; uint8_t next_enabled_pos = 0; - uint8_t next_disabled_pos = state->enabled_values_count; + uint8_t next_disabled_pos = requested_values_count; // CPU_SAMPLES_VALUE is always enabled enabled_value_types[next_enabled_pos] = (ddog_prof_ValueType) CPU_SAMPLES_VALUE; @@ -395,6 +417,13 @@ static VALUE _native_initialize(DDTRACE_UNUSED VALUE _self, VALUE recorder_insta state->heap_recorder = NULL; } + if (timeline_enabled == Qtrue) { + enabled_value_types[next_enabled_pos] = (ddog_prof_ValueType) TIMELINE_VALUE; + state->position_for[TIMELINE_VALUE_ID] = next_enabled_pos++; + } else { + state->position_for[TIMELINE_VALUE_ID] = next_disabled_pos++; + } + ddog_prof_Profile_drop(&state->slot_one_profile); ddog_prof_Profile_drop(&state->slot_two_profile); @@ -476,6 +505,7 @@ void record_sample(VALUE recorder_instance, ddog_prof_Slice_Location locations, metric_values[position_for[CPU_SAMPLES_VALUE_ID]] = values.cpu_or_wall_samples; metric_values[position_for[WALL_TIME_VALUE_ID]] = values.wall_time_ns; metric_values[position_for[ALLOC_SAMPLES_VALUE_ID]] = values.alloc_samples; + metric_values[position_for[TIMELINE_VALUE_ID]] = values.timeline_wall_time_ns; if (values.alloc_samples != 0) { // If we got an allocation sample end the heap allocation recording to commit the heap sample. diff --git a/ext/ddtrace_profiling_native_extension/stack_recorder.h b/ext/ddtrace_profiling_native_extension/stack_recorder.h index 9190cc89a25..4a9f2ead7b2 100644 --- a/ext/ddtrace_profiling_native_extension/stack_recorder.h +++ b/ext/ddtrace_profiling_native_extension/stack_recorder.h @@ -8,6 +8,7 @@ typedef struct { int64_t wall_time_ns; uint32_t cpu_or_wall_samples; uint32_t alloc_samples; + int64_t timeline_wall_time_ns; } sample_values; typedef struct sample_labels { diff --git a/lib/datadog/profiling/component.rb b/lib/datadog/profiling/component.rb index 228fd01f7a9..d0083a583f4 100644 --- a/lib/datadog/profiling/component.rb +++ b/lib/datadog/profiling/component.rb @@ -48,7 +48,11 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:) overhead_target_percentage = valid_overhead_target(settings.profiling.advanced.overhead_target_percentage) upload_period_seconds = [60, settings.profiling.advanced.upload_period_seconds].max - recorder = build_recorder(allocation_profiling_enabled, heap_profiling_enabled) + recorder = build_recorder( + allocation_profiling_enabled: allocation_profiling_enabled, + heap_profiling_enabled: heap_profiling_enabled, + timeline_enabled: timeline_enabled, + ) thread_context_collector = build_thread_context_collector(settings, recorder, optional_tracer, timeline_enabled) worker = Datadog::Profiling::Collectors::CpuAndWallTimeWorker.new( gc_profiling_enabled: enable_gc_profiling?(settings), @@ -72,11 +76,16 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:) Profiling::Profiler.new(worker: worker, scheduler: scheduler) end - private_class_method def self.build_recorder(allocation_profiling_enabled, heap_profiling_enabled) + private_class_method def self.build_recorder( + allocation_profiling_enabled:, + heap_profiling_enabled:, + timeline_enabled: + ) Datadog::Profiling::StackRecorder.new( cpu_time_enabled: RUBY_PLATFORM.include?('linux'), # Only supported on Linux currently alloc_samples_enabled: allocation_profiling_enabled, heap_samples_enabled: heap_profiling_enabled, + timeline_enabled: timeline_enabled, ) end diff --git a/lib/datadog/profiling/stack_recorder.rb b/lib/datadog/profiling/stack_recorder.rb index 3153a4982c5..6337d3ec588 100644 --- a/lib/datadog/profiling/stack_recorder.rb +++ b/lib/datadog/profiling/stack_recorder.rb @@ -4,7 +4,7 @@ module Profiling # Note that `record_sample` is only accessible from native code. # Methods prefixed with _native_ are implemented in `stack_recorder.c` class StackRecorder - def initialize(cpu_time_enabled:, alloc_samples_enabled:, heap_samples_enabled:) + def initialize(cpu_time_enabled:, alloc_samples_enabled:, heap_samples_enabled:, timeline_enabled:) # This mutex works in addition to the fancy C-level mutexes we have in the native side (see the docs there). # It prevents multiple Ruby threads calling serialize at the same time -- something like # `10.times { Thread.new { stack_recorder.serialize } }`. @@ -13,7 +13,13 @@ def initialize(cpu_time_enabled:, alloc_samples_enabled:, heap_samples_enabled:) # accidentally happening. @no_concurrent_synchronize_mutex = Mutex.new - self.class._native_initialize(self, cpu_time_enabled, alloc_samples_enabled, heap_samples_enabled) + self.class._native_initialize( + self, + cpu_time_enabled, + alloc_samples_enabled, + heap_samples_enabled, + timeline_enabled, + ) end def serialize diff --git a/sig/datadog/profiling/component.rbs b/sig/datadog/profiling/component.rbs index 2b32dbe88fe..5de8b61eb6c 100644 --- a/sig/datadog/profiling/component.rbs +++ b/sig/datadog/profiling/component.rbs @@ -8,8 +8,9 @@ module Datadog ) -> Datadog::Profiling::Profiler? def self.build_recorder: ( - bool allocation_profiling_enabled, - bool heap_profiling_enabled, + allocation_profiling_enabled: bool, + heap_profiling_enabled: bool, + timeline_enabled: bool, ) -> Datadog::Profiling::StackRecorder def self.build_thread_context_collector: ( diff --git a/sig/datadog/profiling/stack_recorder.rbs b/sig/datadog/profiling/stack_recorder.rbs index 0f61f9c8ab9..f0681354e95 100644 --- a/sig/datadog/profiling/stack_recorder.rbs +++ b/sig/datadog/profiling/stack_recorder.rbs @@ -3,13 +3,14 @@ module Datadog class StackRecorder @no_concurrent_synchronize_mutex: ::Thread::Mutex - def initialize: (cpu_time_enabled: bool, alloc_samples_enabled: bool, heap_samples_enabled: bool) -> void + def initialize: (cpu_time_enabled: bool, alloc_samples_enabled: bool, heap_samples_enabled: bool, timeline_enabled: bool) -> void def self._native_initialize: ( Datadog::Profiling::StackRecorder recorder_instance, bool cpu_time_enabled, bool alloc_samples_enabled, bool heap_samples_enabled, + bool timeline_enabled, ) -> true def serialize: () -> untyped diff --git a/spec/datadog/profiling/component_spec.rb b/spec/datadog/profiling/component_spec.rb index 77ac09732b4..efd37d42efd 100644 --- a/spec/datadog/profiling/component_spec.rb +++ b/spec/datadog/profiling/component_spec.rb @@ -53,6 +53,8 @@ context 'when using the new CPU Profiling 2.0 profiler' do it 'initializes a ThreadContext collector' do allow(Datadog::Profiling::Collectors::CpuAndWallTimeWorker).to receive(:new) + dummy_stack_recorder = instance_double(Datadog::Profiling::StackRecorder, 'dummy_stack_recorder') + allow(Datadog::Profiling::StackRecorder).to receive(:new).and_return(dummy_stack_recorder) expect(settings.profiling.advanced).to receive(:max_frames).and_return(:max_frames_config) expect(settings.profiling.advanced) @@ -61,7 +63,7 @@ .to receive(:enabled).and_return(:endpoint_collection_enabled_config) expect(Datadog::Profiling::Collectors::ThreadContext).to receive(:new).with( - recorder: instance_of(Datadog::Profiling::StackRecorder), + recorder: dummy_stack_recorder, max_frames: :max_frames_config, tracer: tracer, endpoint_collection_enabled: :endpoint_collection_enabled_config, @@ -276,6 +278,28 @@ end end + context 'when timeline is enabled' do + before { settings.profiling.advanced.experimental_timeline_enabled = true } + + it 'sets up the StackRecorder with timeline_enabled: true' do + expect(Datadog::Profiling::StackRecorder) + .to receive(:new).with(hash_including(timeline_enabled: true)).and_call_original + + build_profiler_component + end + end + + context 'when timeline is disabled' do + before { settings.profiling.advanced.experimental_timeline_enabled = false } + + it 'sets up the StackRecorder with timeline_enabled: false' do + expect(Datadog::Profiling::StackRecorder) + .to receive(:new).with(hash_including(timeline_enabled: false)).and_call_original + + build_profiler_component + end + end + it 'sets up the Profiler with the CpuAndWallTimeWorker collector' do expect(Datadog::Profiling::Profiler).to receive(:new).with( worker: instance_of(Datadog::Profiling::Collectors::CpuAndWallTimeWorker), @@ -295,11 +319,11 @@ it 'sets up the Exporter internal_metadata with relevant settings' do allow(Datadog::Profiling::Collectors::ThreadContext).to receive(:new) allow(Datadog::Profiling::Collectors::CpuAndWallTimeWorker).to receive(:new) + allow(Datadog::Profiling::StackRecorder).to receive(:new) expect(described_class).to receive(:no_signals_workaround_enabled?).and_return(:no_signals_result) expect(settings.profiling.advanced).to receive(:experimental_timeline_enabled).and_return(:timeline_result) - expect(settings.profiling.advanced).to receive(:experimental_allocation_sample_rate) - .and_return(123) + expect(settings.profiling.advanced).to receive(:experimental_allocation_sample_rate).and_return(123) expect(Datadog::Profiling::Exporter).to receive(:new).with( hash_including( internal_metadata: { diff --git a/spec/datadog/profiling/spec_helper.rb b/spec/datadog/profiling/spec_helper.rb index e02c1af213b..fe2de526335 100644 --- a/spec/datadog/profiling/spec_helper.rb +++ b/spec/datadog/profiling/spec_helper.rb @@ -74,11 +74,12 @@ def samples_for_thread(samples, thread) # We disable heap_sample collection by default in tests since it requires some extra mocking/ # setup for it to properly work. - def build_stack_recorder(heap_samples_enabled: false) + def build_stack_recorder(heap_samples_enabled: false, timeline_enabled: false) Datadog::Profiling::StackRecorder.new( cpu_time_enabled: true, alloc_samples_enabled: true, - heap_samples_enabled: heap_samples_enabled + heap_samples_enabled: heap_samples_enabled, + timeline_enabled: timeline_enabled, ) end end diff --git a/spec/datadog/profiling/stack_recorder_spec.rb b/spec/datadog/profiling/stack_recorder_spec.rb index 8652ceb5e05..53110b574da 100644 --- a/spec/datadog/profiling/stack_recorder_spec.rb +++ b/spec/datadog/profiling/stack_recorder_spec.rb @@ -10,12 +10,14 @@ # Disabling these by default since they require some extra setup and produce separate samples. # Enabling this is tested in a particular context below. let(:heap_samples_enabled) { false } + let(:timeline_enabled) { true } subject(:stack_recorder) do described_class.new( cpu_time_enabled: cpu_time_enabled, alloc_samples_enabled: alloc_samples_enabled, - heap_samples_enabled: heap_samples_enabled + heap_samples_enabled: heap_samples_enabled, + timeline_enabled: timeline_enabled, ) end @@ -118,77 +120,76 @@ def slot_two_mutex_locked? expect(start).to be <= finish end - context 'when all profile types are enabled' do + describe 'profile types configuration' do let(:cpu_time_enabled) { true } let(:alloc_samples_enabled) { true } let(:heap_samples_enabled) { true } - - it 'returns a pprof with the configured sample types' do - expect(sample_types_from(decoded_profile)).to eq( + let(:timeline_enabled) { true } + let(:all_profile_types) do + { 'cpu-time' => 'nanoseconds', 'cpu-samples' => 'count', 'wall-time' => 'nanoseconds', 'alloc-samples' => 'count', 'heap-live-samples' => 'count', - ) + 'timeline' => 'nanoseconds', + } end - end - context 'when cpu-time is disabled' do - let(:cpu_time_enabled) { false } - let(:alloc_samples_enabled) { true } - let(:heap_samples_enabled) { true } + def profile_types_without(type) + all_profile_types.dup.tap { |it| it.delete(type) { raise 'Missing key' } } + end - it 'returns a pprof without the cpu-type type' do - expect(sample_types_from(decoded_profile)).to eq( - 'cpu-samples' => 'count', - 'wall-time' => 'nanoseconds', - 'alloc-samples' => 'count', - 'heap-live-samples' => 'count', - ) + context 'when all profile types are enabled' do + it 'returns a pprof with the configured sample types' do + expect(sample_types_from(decoded_profile)).to eq(all_profile_types) + end end - end - context 'when alloc-samples is disabled' do - let(:cpu_time_enabled) { true } - let(:alloc_samples_enabled) { false } - let(:heap_samples_enabled) { true } + context 'when cpu-time is disabled' do + let(:cpu_time_enabled) { false } - it 'returns a pprof without the alloc-samples type' do - expect(sample_types_from(decoded_profile)).to eq( - 'cpu-time' => 'nanoseconds', - 'cpu-samples' => 'count', - 'wall-time' => 'nanoseconds', - 'heap-live-samples' => 'count', - ) + it 'returns a pprof without the cpu-type type' do + expect(sample_types_from(decoded_profile)).to eq(profile_types_without('cpu-time')) + end end - end - context 'when heap-live-samples is disabled' do - let(:cpu_time_enabled) { true } - let(:alloc_samples_enabled) { true } - let(:heap_samples_enabled) { false } + context 'when alloc-samples is disabled' do + let(:alloc_samples_enabled) { false } - it 'returns a pprof without the heap-live-samples type' do - expect(sample_types_from(decoded_profile)).to eq( - 'cpu-time' => 'nanoseconds', - 'cpu-samples' => 'count', - 'wall-time' => 'nanoseconds', - 'alloc-samples' => 'count', - ) + it 'returns a pprof without the alloc-samples type' do + expect(sample_types_from(decoded_profile)).to eq(profile_types_without('alloc-samples')) + end end - end - context 'when all optional types are disabled' do - let(:cpu_time_enabled) { false } - let(:alloc_samples_enabled) { false } - let(:heap_samples_enabled) { false } + context 'when heap-live-samples is disabled' do + let(:heap_samples_enabled) { false } - it 'returns a pprof without the optional types' do - expect(sample_types_from(decoded_profile)).to eq( - 'cpu-samples' => 'count', - 'wall-time' => 'nanoseconds', - ) + it 'returns a pprof without the heap-live-samples type' do + expect(sample_types_from(decoded_profile)).to eq(profile_types_without('heap-live-samples')) + end + end + + context 'when timeline is disabled' do + let(:timeline_enabled) { false } + + it 'returns a pprof without the timeline type' do + expect(sample_types_from(decoded_profile)).to eq(profile_types_without('timeline')) + end + end + + context 'when all optional types are disabled' do + let(:cpu_time_enabled) { false } + let(:alloc_samples_enabled) { false } + let(:heap_samples_enabled) { false } + let(:timeline_enabled) { false } + + it 'returns a pprof without the optional types' do + expect(sample_types_from(decoded_profile)).to eq( + 'cpu-samples' => 'count', + 'wall-time' => 'nanoseconds', + ) + end end end @@ -214,7 +215,9 @@ def sample_types_from(decoded_profile) end context 'when profile has a sample' do - let(:metric_values) { { 'cpu-time' => 123, 'cpu-samples' => 456, 'wall-time' => 789, 'alloc-samples' => 4242 } } + let(:metric_values) do + { 'cpu-time' => 123, 'cpu-samples' => 456, 'wall-time' => 789, 'alloc-samples' => 4242, 'timeline' => 1111 } + end let(:labels) { { 'label_a' => 'value_a', 'label_b' => 'value_b', 'state' => 'unknown' }.to_a } let(:samples) { samples_from_pprof(encoded_pprof) } @@ -232,6 +235,7 @@ def sample_types_from(decoded_profile) :'cpu-samples' => 456, :'wall-time' => 789, :'alloc-samples' => 4242, + :timeline => 1111, ) end @@ -240,7 +244,7 @@ def sample_types_from(decoded_profile) it 'encodes the sample with the metrics provided, ignoring the disabled ones' do expect(samples.first.values) - .to eq(:'cpu-samples' => 456, :'wall-time' => 789, :'alloc-samples' => 4242) + .to eq(:'cpu-samples' => 456, :'wall-time' => 789, :'alloc-samples' => 4242, :timeline => 1111) end end From 14d978916e8ba75d7e32dbe282c42c1d7e4b07a6 Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Fri, 8 Dec 2023 09:53:43 +0000 Subject: [PATCH 07/21] Introduce gc profiling helper This helper takes care of mapping `rb_gc_latest_gc_info` into metadata that gets added to GC samples. This helper will get tested via the thread context specs (changes upcoming). --- .../collectors_gc_profiling_helper.c | 153 ++++++++++++++++++ .../collectors_gc_profiling_helper.h | 5 + 2 files changed, 158 insertions(+) create mode 100644 ext/ddtrace_profiling_native_extension/collectors_gc_profiling_helper.c create mode 100644 ext/ddtrace_profiling_native_extension/collectors_gc_profiling_helper.h diff --git a/ext/ddtrace_profiling_native_extension/collectors_gc_profiling_helper.c b/ext/ddtrace_profiling_native_extension/collectors_gc_profiling_helper.c new file mode 100644 index 00000000000..bbecbfc3580 --- /dev/null +++ b/ext/ddtrace_profiling_native_extension/collectors_gc_profiling_helper.c @@ -0,0 +1,153 @@ +#include +#include + +#include "collectors_gc_profiling_helper.h" + +// This helper is used by the Datadog::Profiling::Collectors::ThreadContext to profile garbage collection. +// It's tested through that class' interfaces. +// --- + +// Used when retrieving GC information from the VM. +// All these are symbols, but we don't need to mark them since we ask for them to be interned (and thus live forever) +static VALUE state_sym; +static VALUE marking_sym; +static VALUE sweeping_sym; +static VALUE none_sym; +static VALUE gc_by_sym; +static VALUE newobj_sym; +static VALUE malloc_sym; +static VALUE method_sym; +static VALUE capi_sym; +static VALUE stress_sym; +static VALUE major_by_sym; +static VALUE nofree_sym; +static VALUE oldgen_sym; +static VALUE shady_sym; +static VALUE force_sym; +static VALUE oldmalloc_sym; + +static ddog_CharSlice major_gc_reason_pretty(VALUE major_gc_reason); +static ddog_CharSlice gc_cause_pretty(VALUE gc_cause); +static ddog_CharSlice gc_type_pretty(VALUE major_gc_reason, VALUE gc_state); + +void gc_profiling_init(void) { + // This function lazy-interns a few constants, which may trigger allocations. Since we want to call it during GC as + // well, when allocations are not allowed, we call it once here so that the constants get defined ahead of time. + rb_gc_latest_gc_info(rb_hash_new()); + + // Used to query and look up the results of GC information + state_sym = ID2SYM(rb_intern_const("state")); + marking_sym = ID2SYM(rb_intern_const("marking")); + sweeping_sym = ID2SYM(rb_intern_const("sweeping")); + none_sym = ID2SYM(rb_intern_const("none")); + gc_by_sym = ID2SYM(rb_intern_const("gc_by")); + newobj_sym = ID2SYM(rb_intern_const("newobj")); + malloc_sym = ID2SYM(rb_intern_const("malloc")); + method_sym = ID2SYM(rb_intern_const("method")); + capi_sym = ID2SYM(rb_intern_const("capi")); + stress_sym = ID2SYM(rb_intern_const("stress")); + major_by_sym = ID2SYM(rb_intern_const("major_by")); + nofree_sym = ID2SYM(rb_intern_const("nofree")); + oldgen_sym = ID2SYM(rb_intern_const("oldgen")); + shady_sym = ID2SYM(rb_intern_const("shady")); + force_sym = ID2SYM(rb_intern_const("force")); + oldmalloc_sym = ID2SYM(rb_intern_const("oldmalloc")); + state_sym = ID2SYM(rb_intern_const("state")); + none_sym = ID2SYM(rb_intern_const("none")); +} + +bool gc_profiling_has_major_gc_finished(void) { + return rb_gc_latest_gc_info(state_sym) == none_sym && rb_gc_latest_gc_info(major_by_sym) != Qnil; +} + +uint8_t gc_profiling_set_metadata(ddog_prof_Label *labels, int labels_length) { + uint8_t max_label_count = + 1 + // thread id + 1 + // thread name + 1 + // state + 1 + // event + 1 + // gc reason + 1 + // gc cause + 1; // gc type + + if (max_label_count > labels_length) { + rb_raise( + rb_eArgError, + "BUG: gc_profiling_set_metadata called with a ddog_prof_Label array with length %d, needed length at least %d", + labels_length, + max_label_count + ); + } + + uint8_t label_pos = 0; + + labels[label_pos++] = (ddog_prof_Label) { + .key = DDOG_CHARSLICE_C("thread id"), + .str = DDOG_CHARSLICE_C("GC"), + }; + + labels[label_pos++] = (ddog_prof_Label) { + .key = DDOG_CHARSLICE_C("thread name"), + .str = DDOG_CHARSLICE_C("Garbage Collection"), + }; + + labels[label_pos++] = (ddog_prof_Label) { + .key = DDOG_CHARSLICE_C("state"), + .str = DDOG_CHARSLICE_C("had cpu"), + }; + + labels[label_pos++] = (ddog_prof_Label) { + .key = DDOG_CHARSLICE_C("event"), + .str = DDOG_CHARSLICE_C("gc"), + }; + + VALUE major_by = rb_gc_latest_gc_info(major_by_sym); + if (major_by != Qnil) { + labels[label_pos++] = (ddog_prof_Label) { + .key = DDOG_CHARSLICE_C("gc reason"), + .str = major_gc_reason_pretty(major_by), + }; + } + + labels[label_pos++] = (ddog_prof_Label) { + .key = DDOG_CHARSLICE_C("gc cause"), + .str = gc_cause_pretty(rb_gc_latest_gc_info(gc_by_sym)), + }; + + labels[label_pos++] = (ddog_prof_Label) { + .key = DDOG_CHARSLICE_C("gc type"), + .str = gc_type_pretty(major_by, rb_gc_latest_gc_info(state_sym)), + }; + + return label_pos; +} + +static ddog_CharSlice major_gc_reason_pretty(VALUE major_gc_reason) { + if (major_gc_reason == nofree_sym ) return DDOG_CHARSLICE_C("not enough free slots (NOFREE)"); + if (major_gc_reason == oldgen_sym ) return DDOG_CHARSLICE_C("old generation full (OLDGEN)"); + if (major_gc_reason == shady_sym ) return DDOG_CHARSLICE_C("too many objects without write barriers (SHADY)"); + if (major_gc_reason == force_sym ) return DDOG_CHARSLICE_C("requested (FORCE)"); + if (major_gc_reason == oldmalloc_sym) return DDOG_CHARSLICE_C("heap bytes allocated threshold (OLDMALLOC)"); + return DDOG_CHARSLICE_C("unknown"); +} + +static ddog_CharSlice gc_cause_pretty(VALUE gc_cause) { + if (gc_cause == newobj_sym) return DDOG_CHARSLICE_C("object allocation"); + if (gc_cause == malloc_sym) return DDOG_CHARSLICE_C("malloc()"); + if (gc_cause == method_sym) return DDOG_CHARSLICE_C("GC.start()"); + if (gc_cause == capi_sym ) return DDOG_CHARSLICE_C("rb_gc()"); + if (gc_cause == stress_sym) return DDOG_CHARSLICE_C("stress"); + return DDOG_CHARSLICE_C("unknown"); +} + +static ddog_CharSlice gc_type_pretty(VALUE major_gc_reason, VALUE gc_state) { + if (major_gc_reason != Qnil) { + if (gc_state == marking_sym ) return DDOG_CHARSLICE_C("major (ongoing, marking)"); + if (gc_state == sweeping_sym) return DDOG_CHARSLICE_C("major (ongoing, sweeping)"); + return DDOG_CHARSLICE_C("major"); + } else { + // As we delay flushing events when a minor GC finishes, it's not relevant to include the observed state of the + // minor GC, as we often won't record a marking -> sweeping -> done cycle, as it happens too quickly. + return DDOG_CHARSLICE_C("minor"); + } +} diff --git a/ext/ddtrace_profiling_native_extension/collectors_gc_profiling_helper.h b/ext/ddtrace_profiling_native_extension/collectors_gc_profiling_helper.h new file mode 100644 index 00000000000..7a5096624c6 --- /dev/null +++ b/ext/ddtrace_profiling_native_extension/collectors_gc_profiling_helper.h @@ -0,0 +1,5 @@ +#pragma once + +void gc_profiling_init(void); +bool gc_profiling_has_major_gc_finished(void); +uint8_t gc_profiling_set_metadata(ddog_prof_Label *labels, int labels_length); From c64d78af607c7b7839e7916f91f35e8b7f1c0782 Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Fri, 8 Dec 2023 09:55:35 +0000 Subject: [PATCH 08/21] Minor: Add long_max_of/long_min_of helpers --- ext/ddtrace_profiling_native_extension/helpers.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ext/ddtrace_profiling_native_extension/helpers.h b/ext/ddtrace_profiling_native_extension/helpers.h index f06c96710ec..d71a513de9f 100644 --- a/ext/ddtrace_profiling_native_extension/helpers.h +++ b/ext/ddtrace_profiling_native_extension/helpers.h @@ -15,3 +15,5 @@ // don't like C and I just implemented this as a function. inline static uint64_t uint64_max_of(uint64_t a, uint64_t b) { return a > b ? a : b; } inline static uint64_t uint64_min_of(uint64_t a, uint64_t b) { return a > b ? b : a; } +inline static long long_max_of(long a, long b) { return a > b ? a : b; } +inline static long long_min_of(long a, long b) { return a > b ? b : a; } From 0ac923ac4186b6bb42c4ceda652830e87f74adde Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Fri, 8 Dec 2023 13:47:17 +0000 Subject: [PATCH 09/21] [NO-TICKET] Redesign GC profiling to add timeline support and reduce overhead **What does this PR do?** This PR redesigns the garbage collection profiling feature to add support for timeline and reduce its overhead. A TL;DR description of how it works now is below: 1. Wiring it up * Ruby exposes native-level tracepoint events for when it starts/stops doing GC (`RUBY_INTERNAL_EVENT_GC_ENTER`/`RUBY_INTERNAL_EVENT_GC_EXIT`). * Inside the `CpuAndWallTimeWorker` we create a tracepoint that listens for these events and calls the `on_gc_start` and `on_gc_finish` events on the `ThreadContext` collector. * The `RUBY_INTERNAL_EVENT_GC_ENTER`/`RUBY_INTERNAL_EVENT_GC_EXIT` tracepoint events are emitted while Ruby is doing GC, which means we can't allocate anything. The code we use is thus built to not allocate anything. * We can't do all the work we want in `on_gc_finish` because we can't allocate anything, so that function returns a flag that tells the `CpuAndWallTimeWorker` to call `sample_after_gc` as soon as the GC finishes. 2. Tracking * When a thread starts doing GC (`on_gc_start`), we record its current cpu-time and wall-time. * When a thread stops doing GC (`on_gc_finish`), we record how much cpu-time and wall-time elapsed. * When `sample_after_gc` gets called, we turn the data recorded above + some metadata from Ruby into a sample event to represent GC as a fake Garbage Collection thread. 3. Overhead * To avoid impacting applications a lot, the Ruby GC works incrementally. That is, rather than doing a full GC (marking, sweeping) in a single step, Ruby breaks this into smaller steps. * We get told about every smaller step: we get a pair of `on_gc_start` /`on_gc_finish` calls for each step. * In the past, we always called `sample_after_gc` after every `on_gc_finish` to record the GC. This was a big source of overhead: in a sample application that allocates as much as possible in a loop I was able to observe more than 50k calls to `on_gc_start`/`on_gc_finish` per second, which obviously would represent a lot of overhead if we create one GC sample for each of these. * To avoid this overhead, we coalesce/accumulate the cpu/wall-time from multiple calls to `on_gc_start`/`on_gc_finish` and only create a sample to represent this data from time to time (default is every 10ms or every full GC cycle, whatever happens first). Compared to the old implementation, here's what changed: * Garbage collection events get attributed to a placeholder "Garbage Collection" thread. * We no longer record "Garbage Collection" samples on the thread that triggered GC. (And thus no longer include the stack of that thread when recording GC information). This enabled a nice cleanup of the code in the stack collector * GCs across several threads can be included in the same sample * Timeline support is included * Extra metadata about the GC can be seen in the timeline * Overhead is lowered, even during heavy periods of GC action **Motivation:** I worked on this as part of the Profiler R&D week. I've been wanting to go back and fix the problems with the gc profiling implementation for a long time :) **Additional Notes:** N/A **How to test the change?** Test coverage is included. During development, this tiny benchmark was useful: ```ruby require 'benchmark/ips' require 'ddtrace' puts RUBY_DESCRIPTION Datadog.configure do |c| c.profiling.enabled = true c.profiling.advanced.allocation_counting_enabled = false c.telemetry.enabled = false end class Potato def initialize(val) @potato = val end end Benchmark.ips do |x| x.config(time: 10, warmup: 2) x.report("DD_PROFILING_FORCE_ENABLE_GC=#{ENV['DD_PROFILING_FORCE_ENABLE_GC']} Potato.new(10)", "Potato.new(10)") x.compare! end ``` --- .../collectors_cpu_and_wall_time_worker.c | 9 +- .../collectors_gc_profiling_helper.c | 11 +- .../collectors_thread_context.c | 264 ++++++------ .../collectors_thread_context.h | 3 +- .../cpu_and_wall_time_worker_spec.rb | 24 +- .../collectors/thread_context_spec.rb | 381 ++++++++---------- 6 files changed, 341 insertions(+), 351 deletions(-) diff --git a/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time_worker.c b/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time_worker.c index 500f3c098c8..0792e699dff 100644 --- a/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time_worker.c +++ b/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time_worker.c @@ -718,14 +718,11 @@ static void on_gc_event(VALUE tracepoint_data, DDTRACE_UNUSED void *unused) { if (event == RUBY_INTERNAL_EVENT_GC_ENTER) { thread_context_collector_on_gc_start(state->thread_context_collector_instance); } else if (event == RUBY_INTERNAL_EVENT_GC_EXIT) { - thread_context_collector_on_gc_finish(state->thread_context_collector_instance); + bool should_flush = thread_context_collector_on_gc_finish(state->thread_context_collector_instance); - // We use rb_postponed_job_register_one to ask Ruby to run thread_context_collector_sample_after_gc after if + // We use rb_postponed_job_register_one to ask Ruby to run thread_context_collector_sample_after_gc after it // fully finishes the garbage collection, so that one is allowed to do allocations and throw exceptions as usual. - // - // Note: If we ever want to get rid of rb_postponed_job_register_one, remember not to clobber Ruby exceptions, as - // this function does this helpful job for us now -- https://github.com/ruby/ruby/commit/a98e343d39c4d7bf1e2190b076720f32d9f298b3. - rb_postponed_job_register_one(0, after_gc_from_postponed_job, NULL); + if (should_flush) rb_postponed_job_register_one(0, after_gc_from_postponed_job, NULL); } } diff --git a/ext/ddtrace_profiling_native_extension/collectors_gc_profiling_helper.c b/ext/ddtrace_profiling_native_extension/collectors_gc_profiling_helper.c index bbecbfc3580..a0b7cdbe376 100644 --- a/ext/ddtrace_profiling_native_extension/collectors_gc_profiling_helper.c +++ b/ext/ddtrace_profiling_native_extension/collectors_gc_profiling_helper.c @@ -71,12 +71,7 @@ uint8_t gc_profiling_set_metadata(ddog_prof_Label *labels, int labels_length) { 1; // gc type if (max_label_count > labels_length) { - rb_raise( - rb_eArgError, - "BUG: gc_profiling_set_metadata called with a ddog_prof_Label array with length %d, needed length at least %d", - labels_length, - max_label_count - ); + rb_raise(rb_eArgError, "BUG: gc_profiling_set_metadata invalid labels_length (%d) < max_label_count (%d)", labels_length, max_label_count); } uint8_t label_pos = 0; @@ -119,6 +114,10 @@ uint8_t gc_profiling_set_metadata(ddog_prof_Label *labels, int labels_length) { .str = gc_type_pretty(major_by, rb_gc_latest_gc_info(state_sym)), }; + if (label_pos > max_label_count) { + rb_raise(rb_eRuntimeError, "BUG: gc_profiling_set_metadata unexpected label_pos (%d) > max_label_count (%d)", label_pos, max_label_count); + } + return label_pos; } diff --git a/ext/ddtrace_profiling_native_extension/collectors_thread_context.c b/ext/ddtrace_profiling_native_extension/collectors_thread_context.c index 618cdb23f5d..68b6a0683f3 100644 --- a/ext/ddtrace_profiling_native_extension/collectors_thread_context.c +++ b/ext/ddtrace_profiling_native_extension/collectors_thread_context.c @@ -3,6 +3,7 @@ #include "collectors_thread_context.h" #include "clock_id.h" #include "collectors_stack.h" +#include "collectors_gc_profiling_helper.h" #include "helpers.h" #include "libdatadog_helpers.h" #include "private_vm_api_access.h" @@ -37,24 +38,29 @@ // When `thread_context_collector_on_gc_start` gets called, the current cpu and wall-time get recorded to the thread // context: `cpu_time_at_gc_start_ns` and `wall_time_at_gc_start_ns`. // -// While these fields are set, regular samples (if any) do not account for any time that passes after these two -// timestamps. +// While `cpu_time_at_gc_start_ns` is set, regular samples (if any) do not account for cpu-time any time that passes +// after this timestamp. The idea is that this cpu-time will be blamed separately on GC, and not on the user thread. +// Wall-time accounting is not affected by this (e.g. we still record 60 seconds every 60 seconds). // -// (Regular samples can still account for the time between the previous sample and the start of GC.) +// (Regular samples can still account for the cpu-time between the previous sample and the start of GC.) // -// When `thread_context_collector_on_gc_finish` gets called, the current cpu and wall-time again get recorded to the -// thread context: `cpu_time_at_gc_finish_ns` and `wall_time_at_gc_finish_ns`. +// When `thread_context_collector_on_gc_finish` gets called, the cpu-time and wall-time spent during GC gets recorded +// into the global gc_tracking structure, and further samples are not affected. (The `cpu_time_at_previous_sample_ns` +// of the thread that did GC also gets adjusted to avoid double-accounting.) // -// Finally, when `thread_context_collector_sample_after_gc` gets called, the following happens: +// Finally, when `thread_context_collector_sample_after_gc` gets called, a sample gets recorded, using the special +// `SAMPLE_IN_GC` sample type, which produces a stack with a placeholder `Garbage Collection` frame. This sample gets +// assigned the cpu-time and wall-time that was recorded between calls to `on_gc_start` and `on_gc_finish`, as well +// as metadata for the last GC. // -// 1. A sample gets taken, using the special `SAMPLE_IN_GC` sample type, which produces a stack with a placeholder -// `Garbage Collection` frame as the latest frame. This sample gets assigned the cpu-time and wall-time period that was -// recorded between calls to `on_gc_start` and `on_gc_finish`. -// -// 2. The thread is no longer marked as being in gc (all gc tracking fields get reset back to `INVALID_TIME`). -// -// 3. The `cpu_time_at_previous_sample_ns` and `wall_time_at_previous_sample_ns` get updated with the elapsed time in -// GC, so that all time is accounted for -- e.g. the next sample will not get "blamed" by time spent in GC. +// Note that the Ruby GC does not usually do all of the GC work in one go. Instead, it breaks it up into smaller steps +// so that the application can keep doing user work in between GC steps. +// The `on_gc_start` / `on_gc_finish` will trigger each time the VM executes these smaller steps, and on a benchmark +// that executes `Object.new` in a loop, I measured more than 50k of this steps per second (!!). +// Creating these many events for every GC step is a lot of overhead, so instead `on_gc_finish` coalesces time +// spent in GC and only flushes it at most every 10 ms/every complete GC collection. This reduces the amount of +// individual GC events we need to record. We use the latest GC metadata for this event, reflecting the last GC that +// happened in the coalesced period. // // In an earlier attempt at implementing this functionality (https://github.com/DataDog/dd-trace-rb/pull/2308), we // discovered that we needed to factor the sampling work away from `thread_context_collector_on_gc_finish` and into a @@ -68,6 +74,7 @@ #define IS_WALL_TIME true #define IS_NOT_WALL_TIME false #define MISSING_TRACER_CONTEXT_KEY 0 +#define TIME_BETWEEN_GC_EVENTS_NS MILLIS_AS_NS(10) static ID at_active_span_id; // id of :@active_span in Ruby static ID at_active_trace_id; // id of :@active_trace in Ruby @@ -114,6 +121,14 @@ struct thread_context_collector_state { // See thread_context_collector_on_gc_start for details unsigned int gc_samples_missed_due_to_missing_context; } stats; + + struct { + unsigned long accumulated_cpu_time_ns; + unsigned long accumulated_wall_time_ns; + + long wall_time_at_previous_gc_ns; // Will be INVALID_TIME unless there's accumulated time above + long wall_time_at_last_flushed_gc_event_ns; // Starts at 0 and then will always be valid + } gc_tracking; }; // Tracks per-thread state @@ -127,15 +142,10 @@ struct per_thread_context { long wall_time_at_previous_sample_ns; // Can be INVALID_TIME until initialized struct { - // Both of these fields are set by on_gc_start and kept until sample_after_gc is called. + // Both of these fields are set by on_gc_start and kept until on_gc_finish is called. // Outside of this window, they will be INVALID_TIME. long cpu_time_at_start_ns; long wall_time_at_start_ns; - - // Both of these fields are set by on_gc_finish and kept until sample_after_gc is called. - // Outside of this window, they will be INVALID_TIME. - long cpu_time_at_finish_ns; - long wall_time_at_finish_ns; } gc_tracking; }; @@ -193,6 +203,7 @@ static VALUE _native_inspect(VALUE self, VALUE collector_instance); static VALUE per_thread_context_st_table_as_ruby_hash(struct thread_context_collector_state *state); static int per_thread_context_as_ruby_hash(st_data_t key_thread, st_data_t value_context, st_data_t result_hash); static VALUE stats_as_ruby_hash(struct thread_context_collector_state *state); +static VALUE gc_tracking_as_ruby_hash(struct thread_context_collector_state *state); static void remove_context_for_dead_threads(struct thread_context_collector_state *state); static int remove_if_dead_thread(st_data_t key_thread, st_data_t value_context, st_data_t _argument); static VALUE _native_per_thread_context(VALUE self, VALUE collector_instance); @@ -200,13 +211,14 @@ static long update_time_since_previous_sample(long *time_at_previous_sample_ns, static long cpu_time_now_ns(struct per_thread_context *thread_context); static long thread_id_for(VALUE thread); static VALUE _native_stats(VALUE self, VALUE collector_instance); +static VALUE _native_gc_tracking(VALUE self, VALUE collector_instance); static void trace_identifiers_for(struct thread_context_collector_state *state, VALUE thread, struct trace_identifiers *trace_identifiers_result); static bool should_collect_resource(VALUE root_span_type); static VALUE _native_reset_after_fork(DDTRACE_UNUSED VALUE self, VALUE collector_instance); static VALUE thread_list(struct thread_context_collector_state *state); static VALUE _native_sample_allocation(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE sample_weight, VALUE new_object); static VALUE _native_new_empty_thread(VALUE self); -ddog_CharSlice ruby_value_type_to_class_name(enum ruby_value_type type); +static ddog_CharSlice ruby_value_type_to_class_name(enum ruby_value_type type); void collectors_thread_context_init(VALUE profiling_module) { VALUE collectors_module = rb_define_module_under(profiling_module, "Collectors"); @@ -235,6 +247,7 @@ void collectors_thread_context_init(VALUE profiling_module) { rb_define_singleton_method(testing_module, "_native_thread_list", _native_thread_list, 0); rb_define_singleton_method(testing_module, "_native_per_thread_context", _native_per_thread_context, 1); rb_define_singleton_method(testing_module, "_native_stats", _native_stats, 1); + rb_define_singleton_method(testing_module, "_native_gc_tracking", _native_gc_tracking, 1); rb_define_singleton_method(testing_module, "_native_new_empty_thread", _native_new_empty_thread, 0); at_active_span_id = rb_intern_const("@active_span"); @@ -243,6 +256,8 @@ void collectors_thread_context_init(VALUE profiling_module) { at_resource_id = rb_intern_const("@resource"); at_root_span_id = rb_intern_const("@root_span"); at_type_id = rb_intern_const("@type"); + + gc_profiling_init(); } // This structure is used to define a Ruby object that stores a pointer to a struct thread_context_collector_state @@ -320,6 +335,8 @@ static VALUE _native_new(VALUE klass) { state->allocation_type_enabled = true; state->time_converter_state = (monotonic_to_system_epoch_state) MONOTONIC_TO_SYSTEM_EPOCH_INITIALIZER; state->main_thread = rb_thread_main(); + state->gc_tracking.wall_time_at_previous_gc_ns = INVALID_TIME; + state->gc_tracking.wall_time_at_last_flushed_gc_event_ns = 0; return TypedData_Wrap_Struct(klass, &thread_context_collector_typed_data, state); } @@ -465,7 +482,11 @@ void update_metrics_and_sample( long wall_time_elapsed_ns = update_time_since_previous_sample( &thread_context->wall_time_at_previous_sample_ns, current_monotonic_wall_time_ns, - thread_context->gc_tracking.wall_time_at_start_ns, + // We explicitly pass in `INVALID_TIME` as an argument for `gc_start_time_ns` here because we don't want wall-time + // accounting to change during GC. + // E.g. if 60 seconds pass in the real world, 60 seconds of wall-time are recorded, regardless of the thread doing + // GC or not. + INVALID_TIME, IS_WALL_TIME ); @@ -484,7 +505,7 @@ void update_metrics_and_sample( // This function gets called when Ruby is about to start running the Garbage Collector on the current thread. // It updates the per_thread_context of the current thread to include the current cpu/wall times, to be used to later -// create a stack sample that blames the cpu/wall time spent from now until the end of the garbage collector work. +// create an event including the cpu/wall time spent in garbage collector work. // // Safety: This function gets called while Ruby is doing garbage collection. While Ruby is doing garbage collection, // *NO ALLOCATION* is allowed. This function, and any it calls must never trigger memory or object allocation. @@ -509,27 +530,14 @@ void thread_context_collector_on_gc_start(VALUE self_instance) { return; } - // If these fields are set, there's an existing GC sample that still needs to be written out by `sample_after_gc`. - // - // When can this happen? Because we don't have precise control over when `sample_after_gc` gets called (it will be - // called sometime after GC finishes), there is no way to guarantee that Ruby will not trigger more than one GC cycle - // before we can actually run that method. - // - // We handle this by collapsing multiple GC cycles into one. That is, if the following happens: - // `on_gc_start` (time=0) -> `on_gc_finish` (time=1) -> `on_gc_start` (time=2) -> `on_gc_finish` (time=3) -> `sample_after_gc` - // then we just use time=0 from the first on_gc_start and time=3 from the last on_gc_finish, e.g. we behave as if - // there was a single, longer GC period. - if (thread_context->gc_tracking.cpu_time_at_finish_ns != INVALID_TIME && - thread_context->gc_tracking.wall_time_at_finish_ns != INVALID_TIME) return; - - // Here we record the wall-time first and in on_gc_finish we record it second to avoid having wall-time be slightly < cpu-time + // Here we record the wall-time first and in on_gc_finish we record it second to try to avoid having wall-time be slightly < cpu-time thread_context->gc_tracking.wall_time_at_start_ns = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE); thread_context->gc_tracking.cpu_time_at_start_ns = cpu_time_now_ns(thread_context); } // This function gets called when Ruby has finished running the Garbage Collector on the current thread. -// It updates the per_thread_context of the current thread to include the current cpu/wall times, to be used to later -// create a stack sample that blames the cpu/wall time spent from the start of garbage collector work until now. +// It records the cpu/wall-time observed during GC, which will be used to later +// create an event including the cpu/wall time spent from the start of garbage collector work until now. // // Safety: This function gets called while Ruby is doing garbage collection. While Ruby is doing garbage collection, // *NO ALLOCATION* is allowed. This function, and any it calls must never trigger memory or object allocation. @@ -537,9 +545,9 @@ void thread_context_collector_on_gc_start(VALUE self_instance) { // // Assumption 1: This function is called in a thread that is holding the Global VM Lock. Caller is responsible for enforcing this. // Assumption 2: This function is called from the main Ractor (if Ruby has support for Ractors). -void thread_context_collector_on_gc_finish(VALUE self_instance) { +bool thread_context_collector_on_gc_finish(VALUE self_instance) { struct thread_context_collector_state *state; - if (!rb_typeddata_is_kind_of(self_instance, &thread_context_collector_typed_data)) return; + if (!rb_typeddata_is_kind_of(self_instance, &thread_context_collector_typed_data)) return false; // This should never fail the the above check passes TypedData_Get_Struct(self_instance, struct thread_context_collector_state, &thread_context_collector_typed_data, state); @@ -547,29 +555,65 @@ void thread_context_collector_on_gc_finish(VALUE self_instance) { // If there was no previously-existing context for this thread, we won't allocate one (see safety). We keep a metric for // how often this happens -- see on_gc_start. - if (thread_context == NULL) return; + if (thread_context == NULL) return false; if (thread_context->gc_tracking.cpu_time_at_start_ns == INVALID_TIME && thread_context->gc_tracking.wall_time_at_start_ns == INVALID_TIME) { // If this happened, it means that on_gc_start was either never called for the thread OR it was called but no thread // context existed at the time. The former can be the result of a bug, but since we can't distinguish them, we just // do nothing. - return; + return false; } - // Here we record the wall-time second and in on_gc_start we record it first to avoid having wall-time be slightly < cpu-time - thread_context->gc_tracking.cpu_time_at_finish_ns = cpu_time_now_ns(thread_context); - thread_context->gc_tracking.wall_time_at_finish_ns = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE); + // Here we record the wall-time second and in on_gc_start we record it first to try to avoid having wall-time be slightly < cpu-time + long cpu_time_at_finish_ns = cpu_time_now_ns(thread_context); + long wall_time_at_finish_ns = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE); + + long gc_cpu_time_elapsed_ns = cpu_time_at_finish_ns - thread_context->gc_tracking.cpu_time_at_start_ns; + long gc_wall_time_elapsed_ns = wall_time_at_finish_ns - thread_context->gc_tracking.wall_time_at_start_ns; + + // Mark thread as no longer in GC + thread_context->gc_tracking.cpu_time_at_start_ns = INVALID_TIME; + thread_context->gc_tracking.wall_time_at_start_ns = INVALID_TIME; + + // If our end timestamp is not OK, we bail out + if (wall_time_at_finish_ns == 0) return false; + + // Wall-time can go backwards if the system clock gets changed (and we observed spurious jumps back on macOS as well) + // so let's ensure we don't get negative values for time deltas. + gc_cpu_time_elapsed_ns = long_max_of(gc_cpu_time_elapsed_ns, 0); + gc_wall_time_elapsed_ns = long_max_of(gc_wall_time_elapsed_ns, 0); + + if (state->gc_tracking.wall_time_at_previous_gc_ns == INVALID_TIME) { + state->gc_tracking.accumulated_cpu_time_ns = 0; + state->gc_tracking.accumulated_wall_time_ns = 0; + } + + state->gc_tracking.accumulated_cpu_time_ns += gc_cpu_time_elapsed_ns; + state->gc_tracking.accumulated_wall_time_ns += gc_wall_time_elapsed_ns; + state->gc_tracking.wall_time_at_previous_gc_ns = wall_time_at_finish_ns; + + // Update cpu-time accounting so it doesn't include the cpu-time spent in GC during the next sample + // We don't update the wall-time because we don't subtract the wall-time spent in GC (see call to + // `update_time_since_previous_sample` for wall-time in `update_metrics_and_sample`). + if (thread_context->cpu_time_at_previous_sample_ns != INVALID_TIME) { + thread_context->cpu_time_at_previous_sample_ns += gc_cpu_time_elapsed_ns; + } + + // Let the caller know if it should schedule a flush or not. Returning true every time would cause a lot of overhead + // on the application (see GC tracking introduction at the top of the file), so instead we try to accumulate a few + // samples first. + bool finished_major_gc = gc_profiling_has_major_gc_finished(); + bool over_flush_time_treshold = + (wall_time_at_finish_ns - state->gc_tracking.wall_time_at_last_flushed_gc_event_ns) >= TIME_BETWEEN_GC_EVENTS_NS; + + return finished_major_gc || over_flush_time_treshold; } -// This function gets called shortly after Ruby has finished running the Garbage Collector. +// This function gets called after one or more GC work steps (calls to on_gc_start/on_gc_finish). // It creates a new sample including the cpu and wall-time spent by the garbage collector work, and resets any // GC-related tracking. // -// Specifically, it will search for thread(s) which have gone through a cycle of on_gc_start/on_gc_finish -// and thus have cpu_time_at_start_ns, cpu_time_at_finish_ns, wall_time_at_start_ns, wall_time_at_finish_ns -// set on their context. -// // Assumption 1: This function is called in a thread that is holding the Global VM Lock. Caller is responsible for enforcing this. // Assumption 2: This function is allowed to raise exceptions. Caller is responsible for handling them, if needed. // Assumption 3: Unlike `on_gc_start` and `on_gc_finish`, this method is allowed to allocate memory as needed. @@ -578,70 +622,41 @@ VALUE thread_context_collector_sample_after_gc(VALUE self_instance) { struct thread_context_collector_state *state; TypedData_Get_Struct(self_instance, struct thread_context_collector_state, &thread_context_collector_typed_data, state); - VALUE threads = thread_list(state); - bool sampled_any_thread = false; - - const long thread_count = RARRAY_LEN(threads); - for (long i = 0; i < thread_count; i++) { - VALUE thread = RARRAY_AREF(threads, i); - struct per_thread_context *thread_context = get_or_create_context_for(thread, state); + if (state->gc_tracking.wall_time_at_previous_gc_ns == INVALID_TIME) { + rb_raise(rb_eRuntimeError, "BUG: Unexpected call to sample_after_gc without valid GC information available"); + } - if ( - thread_context->gc_tracking.cpu_time_at_start_ns == INVALID_TIME || - thread_context->gc_tracking.cpu_time_at_finish_ns == INVALID_TIME || - thread_context->gc_tracking.wall_time_at_start_ns == INVALID_TIME || - thread_context->gc_tracking.wall_time_at_finish_ns == INVALID_TIME - ) continue; // Ignore threads with no/incomplete garbage collection data - - sampled_any_thread = true; - - long gc_cpu_time_elapsed_ns = - thread_context->gc_tracking.cpu_time_at_finish_ns - thread_context->gc_tracking.cpu_time_at_start_ns; - long gc_wall_time_elapsed_ns = - thread_context->gc_tracking.wall_time_at_finish_ns - thread_context->gc_tracking.wall_time_at_start_ns; - - // We don't expect non-wall time to go backwards, so let's flag this as a bug - if (gc_cpu_time_elapsed_ns < 0) rb_raise(rb_eRuntimeError, "BUG: Unexpected negative gc_cpu_time_elapsed_ns between samples"); - // Wall-time can actually go backwards (e.g. when the system clock gets set) so we can't assume time going backwards - // was a bug. - // @ivoanjo: I've also observed time going backwards spuriously on macOS, see discussion on - // https://github.com/DataDog/dd-trace-rb/pull/2336. - if (gc_wall_time_elapsed_ns < 0) gc_wall_time_elapsed_ns = 0; - - if (thread_context->gc_tracking.wall_time_at_start_ns == 0 && thread_context->gc_tracking.wall_time_at_finish_ns != 0) { - // Avoid using wall-clock if we got 0 for a start (meaning there was an error) but not 0 for finish so we don't - // come up with a crazy value for the frame - rb_raise(rb_eRuntimeError, "BUG: Unexpected zero value for gc_tracking.wall_time_at_start_ns"); - } + int max_labels_needed_for_gc = 7; // Magic number gets validated inside gc_profiling_set_metadata + ddog_prof_Label labels[max_labels_needed_for_gc]; + uint8_t label_pos = gc_profiling_set_metadata(labels, max_labels_needed_for_gc); - trigger_sample_for_thread( - state, - /* thread: */ thread, - /* stack_from_thread: */ thread, - thread_context, - (sample_values) {.cpu_time_ns = gc_cpu_time_elapsed_ns, .cpu_or_wall_samples = 1, .wall_time_ns = gc_wall_time_elapsed_ns}, - SAMPLE_IN_GC, - INVALID_TIME, // For now we're not collecting timestamps for these events - NULL, - NULL - ); + ddog_prof_Slice_Label slice_labels = {.ptr = labels, .len = label_pos}; - // Mark thread as no longer in GC - thread_context->gc_tracking.cpu_time_at_start_ns = INVALID_TIME; - thread_context->gc_tracking.cpu_time_at_finish_ns = INVALID_TIME; - thread_context->gc_tracking.wall_time_at_start_ns = INVALID_TIME; - thread_context->gc_tracking.wall_time_at_finish_ns = INVALID_TIME; + // The end_timestamp_ns is treated specially by libdatadog and that's why it's not added as a ddog_prof_Label + int64_t end_timestamp_ns = 0; - // Update counters so that they won't include the time in GC during the next sample - if (thread_context->cpu_time_at_previous_sample_ns != INVALID_TIME) { - thread_context->cpu_time_at_previous_sample_ns += gc_cpu_time_elapsed_ns; - } - if (thread_context->wall_time_at_previous_sample_ns != INVALID_TIME) { - thread_context->wall_time_at_previous_sample_ns += gc_wall_time_elapsed_ns; - } + if (state->timeline_enabled) { + end_timestamp_ns = monotonic_to_system_epoch_ns(&state->time_converter_state, state->gc_tracking.wall_time_at_previous_gc_ns); } - if (sampled_any_thread) state->stats.gc_samples++; + sample_thread( + /* thread: */ Qnil, + /* buffer: */ state->sampling_buffer, + /* recorder_instance: */ state->recorder_instance, + (sample_values) { + .cpu_time_ns = state->gc_tracking.accumulated_cpu_time_ns, + .cpu_or_wall_samples = 1, + .wall_time_ns = state->gc_tracking.accumulated_wall_time_ns, + .timeline_wall_time_ns = state->gc_tracking.accumulated_wall_time_ns, + }, + (sample_labels) {.labels = slice_labels, .state_label = NULL, .end_timestamp_ns = end_timestamp_ns}, + SAMPLE_IN_GC + ); + + state->gc_tracking.wall_time_at_last_flushed_gc_event_ns = state->gc_tracking.wall_time_at_previous_gc_ns; + state->gc_tracking.wall_time_at_previous_gc_ns = INVALID_TIME; + + state->stats.gc_samples++; // Return a VALUE to make it easier to call this function from Ruby APIs that expect a return value (such as rb_rescue2) return Qnil; @@ -874,9 +889,7 @@ static void initialize_context(VALUE thread, struct per_thread_context *thread_c // These will only be used during a GC operation thread_context->gc_tracking.cpu_time_at_start_ns = INVALID_TIME; - thread_context->gc_tracking.cpu_time_at_finish_ns = INVALID_TIME; thread_context->gc_tracking.wall_time_at_start_ns = INVALID_TIME; - thread_context->gc_tracking.wall_time_at_finish_ns = INVALID_TIME; } static VALUE _native_inspect(DDTRACE_UNUSED VALUE _self, VALUE collector_instance) { @@ -901,6 +914,7 @@ static VALUE _native_inspect(DDTRACE_UNUSED VALUE _self, VALUE collector_instanc state->time_converter_state.delta_to_epoch_ns )); rb_str_concat(result, rb_sprintf(" main_thread=%"PRIsVALUE, state->main_thread)); + rb_str_concat(result, rb_sprintf(" gc_tracking=%"PRIsVALUE, gc_tracking_as_ruby_hash(state))); return result; } @@ -927,9 +941,7 @@ static int per_thread_context_as_ruby_hash(st_data_t key_thread, st_data_t value ID2SYM(rb_intern("wall_time_at_previous_sample_ns")), /* => */ LONG2NUM(thread_context->wall_time_at_previous_sample_ns), ID2SYM(rb_intern("gc_tracking.cpu_time_at_start_ns")), /* => */ LONG2NUM(thread_context->gc_tracking.cpu_time_at_start_ns), - ID2SYM(rb_intern("gc_tracking.cpu_time_at_finish_ns")), /* => */ LONG2NUM(thread_context->gc_tracking.cpu_time_at_finish_ns), ID2SYM(rb_intern("gc_tracking.wall_time_at_start_ns")), /* => */ LONG2NUM(thread_context->gc_tracking.wall_time_at_start_ns), - ID2SYM(rb_intern("gc_tracking.wall_time_at_finish_ns")), /* => */ LONG2NUM(thread_context->gc_tracking.wall_time_at_finish_ns) }; for (long unsigned int i = 0; i < VALUE_COUNT(arguments); i += 2) rb_hash_aset(context_as_hash, arguments[i], arguments[i+1]); @@ -947,6 +959,19 @@ static VALUE stats_as_ruby_hash(struct thread_context_collector_state *state) { return stats_as_hash; } +static VALUE gc_tracking_as_ruby_hash(struct thread_context_collector_state *state) { + // Update this when modifying state struct (gc_tracking inner struct) + VALUE result = rb_hash_new(); + VALUE arguments[] = { + ID2SYM(rb_intern("accumulated_cpu_time_ns")), /* => */ ULONG2NUM(state->gc_tracking.accumulated_cpu_time_ns), + ID2SYM(rb_intern("accumulated_wall_time_ns")), /* => */ ULONG2NUM(state->gc_tracking.accumulated_wall_time_ns), + ID2SYM(rb_intern("wall_time_at_previous_gc_ns")), /* => */ LONG2NUM(state->gc_tracking.wall_time_at_previous_gc_ns), + ID2SYM(rb_intern("wall_time_at_last_flushed_gc_event_ns")), /* => */ LONG2NUM(state->gc_tracking.wall_time_at_last_flushed_gc_event_ns), + }; + for (long unsigned int i = 0; i < VALUE_COUNT(arguments); i += 2) rb_hash_aset(result, arguments[i], arguments[i+1]); + return result; +} + static void remove_context_for_dead_threads(struct thread_context_collector_state *state) { st_foreach(state->hash_map_per_thread_context, remove_if_dead_thread, 0 /* unused */); } @@ -1049,8 +1074,6 @@ VALUE enforce_thread_context_collector_instance(VALUE object) { // This method exists only to enable testing Datadog::Profiling::Collectors::ThreadContext behavior using RSpec. // It SHOULD NOT be used for other purposes. -// -// Returns the whole contents of the per_thread_context structs being tracked. static VALUE _native_stats(DDTRACE_UNUSED VALUE _self, VALUE collector_instance) { struct thread_context_collector_state *state; TypedData_Get_Struct(collector_instance, struct thread_context_collector_state, &thread_context_collector_typed_data, state); @@ -1058,6 +1081,15 @@ static VALUE _native_stats(DDTRACE_UNUSED VALUE _self, VALUE collector_instance) return stats_as_ruby_hash(state); } +// This method exists only to enable testing Datadog::Profiling::Collectors::ThreadContext behavior using RSpec. +// It SHOULD NOT be used for other purposes. +static VALUE _native_gc_tracking(DDTRACE_UNUSED VALUE _self, VALUE collector_instance) { + struct thread_context_collector_state *state; + TypedData_Get_Struct(collector_instance, struct thread_context_collector_state, &thread_context_collector_typed_data, state); + + return gc_tracking_as_ruby_hash(state); +} + // Assumption 1: This function is called in a thread that is holding the Global VM Lock. Caller is responsible for enforcing this. static void trace_identifiers_for(struct thread_context_collector_state *state, VALUE thread, struct trace_identifiers *trace_identifiers_result) { if (state->tracer_context_key == MISSING_TRACER_CONTEXT_KEY) return; @@ -1241,7 +1273,7 @@ static VALUE _native_new_empty_thread(DDTRACE_UNUSED VALUE self) { return rb_thread_create(new_empty_thread_inner, NULL); } -ddog_CharSlice ruby_value_type_to_class_name(enum ruby_value_type type) { +static ddog_CharSlice ruby_value_type_to_class_name(enum ruby_value_type type) { switch (type) { case(RUBY_T_OBJECT ): return DDOG_CHARSLICE_C("Object"); case(RUBY_T_CLASS ): return DDOG_CHARSLICE_C("Class"); diff --git a/ext/ddtrace_profiling_native_extension/collectors_thread_context.h b/ext/ddtrace_profiling_native_extension/collectors_thread_context.h index 88dfabe9615..6299d96b43e 100644 --- a/ext/ddtrace_profiling_native_extension/collectors_thread_context.h +++ b/ext/ddtrace_profiling_native_extension/collectors_thread_context.h @@ -1,6 +1,7 @@ #pragma once #include +#include void thread_context_collector_sample( VALUE self_instance, @@ -10,5 +11,5 @@ void thread_context_collector_sample( void thread_context_collector_sample_allocation(VALUE self_instance, unsigned int sample_weight, VALUE new_object); VALUE thread_context_collector_sample_after_gc(VALUE self_instance); void thread_context_collector_on_gc_start(VALUE self_instance); -void thread_context_collector_on_gc_finish(VALUE self_instance); +bool thread_context_collector_on_gc_finish(VALUE self_instance); VALUE enforce_thread_context_collector_instance(VALUE object); diff --git a/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb b/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb index 21888900e7c..9f0a993924d 100644 --- a/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb +++ b/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb @@ -267,17 +267,19 @@ all_samples = samples_from_pprof(recorder.serialize!) - current_thread_gc_samples = - samples_for_thread(all_samples, Thread.current) - .select { |it| it.locations.first.path == 'Garbage Collection' } - - # NOTE: In some cases, Ruby may actually call two GC's back-to-back without us having the possibility to take - # a sample. I don't expect this to happen for this test (that's what the `Thread.pass` above is trying to avoid) - # but if this spec turns out to be flaky, that is probably the issue, and that would mean we'd need to relax the - # check. - expect( - current_thread_gc_samples.inject(0) { |sum, sample| sum + sample.values.fetch(:'cpu-samples') } - ).to be >= invoke_gc_times + gc_sample = all_samples.find { |sample| sample.labels[:'gc cause'] == 'GC.start()' } + + expect(gc_sample.values.fetch(:'cpu-samples')).to be >= invoke_gc_times + expect(gc_sample.labels).to match a_hash_including( + :state => 'had cpu', + :'thread id' => 'GC', + :'thread name' => 'Garbage Collection', + :event => 'gc', + :'gc reason' => 'requested (FORCE)', + :'gc cause' => 'GC.start()', + :'gc type' => 'major', + ) + expect(gc_sample.locations.first.path).to eq 'Garbage Collection' end context 'when the background thread dies without cleaning up (after Ruby forks)' do diff --git a/spec/datadog/profiling/collectors/thread_context_spec.rb b/spec/datadog/profiling/collectors/thread_context_spec.rb index 88e32c6a089..105af8c2900 100644 --- a/spec/datadog/profiling/collectors/thread_context_spec.rb +++ b/spec/datadog/profiling/collectors/thread_context_spec.rb @@ -9,7 +9,7 @@ expect(Thread.list).to include(Thread.main, t1, t2, t3) end - let(:recorder) { build_stack_recorder } + let(:recorder) { build_stack_recorder(timeline_enabled: timeline_enabled) } let(:ready_queue) { Queue.new } let(:t1) do Thread.new(ready_queue) do |ready_queue| @@ -91,6 +91,10 @@ def stats described_class::Testing._native_stats(cpu_and_wall_time_collector) end + def gc_tracking + described_class::Testing._native_gc_tracking(cpu_and_wall_time_collector) + end + # This method exists only so we can look for its name in the stack trace in a few tests def inside_t1 yield @@ -183,50 +187,25 @@ def another_way_of_calling_sample(profiler_overhead_stack_thread: Thread.current expect(t1_samples.map(&:values).map { |it| it.fetch(:'cpu-samples') }.reduce(:+)).to eq 5 end - [:before, :after].each do |on_gc_finish_order| - context "when a thread is marked as being in garbage collection, #{on_gc_finish_order} on_gc_finish" do - # Until sample_after_gc gets called, the state left over by both on_gc_start and on_gc_finish "blocks" time - # from being assigned to further samples. Note this is expected to be very rare in practice, otherwise we would - # probably want to look into skipping these samples entirely. - it 'records the wall-time between a previous sample and the start of garbage collection, and no further time' do - sample - wall_time_at_first_sample = per_thread_context.fetch(Thread.current).fetch(:wall_time_at_previous_sample_ns) - - on_gc_start - on_gc_finish if on_gc_finish_order == :after - - wall_time_at_gc_start = per_thread_context.fetch(Thread.current).fetch(:'gc_tracking.wall_time_at_start_ns') - - # Even though we keep calling sample, the result only includes the time until we called on_gc_start - 5.times { another_way_of_calling_sample } - - total_wall_for_rspec_thread = - samples_for_thread(samples, Thread.current) - .select { |it| it.locations.find { |frame| frame.base_label == 'another_way_of_calling_sample' } } - .map { |it| it.values.fetch(:'wall-time') } - .reduce(:+) - - expect(total_wall_for_rspec_thread).to be(wall_time_at_gc_start - wall_time_at_first_sample) - end + context 'when a thread is marked as being in garbage collection by on_gc_start' do + # @ivoanjo: This spec exists because for cpu-time the behavior is not this one (e.g. we don't keep recording + # cpu-time), and I wanted to validate that the different behavior does not get applied to wall-time. + it 'keeps recording the wall-time after every sample' do + sample + wall_time_at_first_sample = per_thread_context.fetch(Thread.current).fetch(:wall_time_at_previous_sample_ns) - # The whole point of wall_time_at_previous_sample_ns is to track the past point in time that we use as start of - # the time range for a sample. - # BUT, we can't let it advance during GC as it should only get accounted for when GC finishes. - it 'does not advance wall_time_at_previous_sample_ns for the thread beyond gc_tracking.wall_time_at_start_ns' do - sample + on_gc_start - on_gc_start - on_gc_finish if on_gc_finish_order == :after + 5.times { sample } - wall_time_at_gc_start = per_thread_context.fetch(Thread.current).fetch(:'gc_tracking.wall_time_at_start_ns') + time_after = Datadog::Core::Utils::Time.get_time(:nanosecond) - 5.times { sample } + sample - wall_time_at_previous_sample_ns = - per_thread_context.fetch(Thread.current).fetch(:wall_time_at_previous_sample_ns) + wall_time_at_last_sample = per_thread_context.fetch(Thread.current).fetch(:wall_time_at_previous_sample_ns) - expect(wall_time_at_previous_sample_ns).to be wall_time_at_gc_start - end + expect(wall_time_at_last_sample).to be >= wall_time_at_first_sample + expect(wall_time_at_last_sample).to be >= time_after end end @@ -267,47 +246,61 @@ def another_way_of_calling_sample(profiler_overhead_stack_thread: Thread.current expect(total_cpu_for_rspec_thread).to be_between(1, rspec_thread_spent_time) end - [:before, :after].each do |on_gc_finish_order| - context "when a thread is marked as being in garbage collection, #{on_gc_finish_order} on_gc_finish" do - it 'records the cpu-time between a previous sample and the start of garbage collection, and no further time' do - sample - cpu_time_at_first_sample = per_thread_context.fetch(Thread.current).fetch(:cpu_time_at_previous_sample_ns) + context 'when a thread is marked as being in garbage collection by on_gc_start' do + it 'records the cpu-time between a previous sample and the start of garbage collection, and no further time' do + sample + cpu_time_at_first_sample = per_thread_context.fetch(Thread.current).fetch(:cpu_time_at_previous_sample_ns) - on_gc_start - on_gc_finish if on_gc_finish_order == :after + on_gc_start - cpu_time_at_gc_start = per_thread_context.fetch(Thread.current).fetch(:'gc_tracking.cpu_time_at_start_ns') + cpu_time_at_gc_start = per_thread_context.fetch(Thread.current).fetch(:'gc_tracking.cpu_time_at_start_ns') - # Even though we keep calling sample, the result only includes the time until we called on_gc_start - 5.times { another_way_of_calling_sample } + # Even though we keep calling sample, the result only includes the time until we called on_gc_start + 5.times { another_way_of_calling_sample } - total_cpu_for_rspec_thread = - samples_for_thread(samples, Thread.current) - .select { |it| it.locations.find { |frame| frame.base_label == 'another_way_of_calling_sample' } } - .map { |it| it.values.fetch(:'cpu-time') } - .reduce(:+) + total_cpu_for_rspec_thread = + samples_for_thread(samples, Thread.current) + .select { |it| it.locations.find { |frame| frame.base_label == 'another_way_of_calling_sample' } } + .map { |it| it.values.fetch(:'cpu-time') } + .reduce(:+) - expect(total_cpu_for_rspec_thread).to be(cpu_time_at_gc_start - cpu_time_at_first_sample) - end + expect(total_cpu_for_rspec_thread).to be(cpu_time_at_gc_start - cpu_time_at_first_sample) + end - # The whole point of cpu_time_at_previous_sample_ns is to track the past point in time that we use as start of - # the time range for a sample. - # BUT, we can't let it advance during GC as it should only get accounted for when GC finishes. - it 'does not advance cpu_time_at_previous_sample_ns for the thread beyond gc_tracking.cpu_time_at_start_ns' do - sample + # When a thread is marked as being in GC the cpu_time_at_previous_sample_ns is not allowed to advance until + # the GC finishes. + it 'does not advance cpu_time_at_previous_sample_ns for the thread beyond gc_tracking.cpu_time_at_start_ns' do + sample + + on_gc_start - on_gc_start - on_gc_finish if on_gc_finish_order == :after + cpu_time_at_gc_start = per_thread_context.fetch(Thread.current).fetch(:'gc_tracking.cpu_time_at_start_ns') - cpu_time_at_gc_start = per_thread_context.fetch(Thread.current).fetch(:'gc_tracking.cpu_time_at_start_ns') + 5.times { sample } - 5.times { sample } + cpu_time_at_previous_sample_ns = + per_thread_context.fetch(Thread.current).fetch(:cpu_time_at_previous_sample_ns) - cpu_time_at_previous_sample_ns = - per_thread_context.fetch(Thread.current).fetch(:cpu_time_at_previous_sample_ns) + expect(cpu_time_at_previous_sample_ns).to be cpu_time_at_gc_start + end + end - expect(cpu_time_at_previous_sample_ns).to be cpu_time_at_gc_start - end + context 'when a thread is unmarked as being in garbage collection by on_gc_finish' do + it 'lets cpu_time_at_previous_sample_ns advance again' do + sample + + on_gc_start + + cpu_time_at_gc_start = per_thread_context.fetch(Thread.current).fetch(:'gc_tracking.cpu_time_at_start_ns') + + on_gc_finish + + 5.times { sample } + + cpu_time_at_previous_sample_ns = + per_thread_context.fetch(Thread.current).fetch(:cpu_time_at_previous_sample_ns) + + expect(cpu_time_at_previous_sample_ns).to be > cpu_time_at_gc_start end end end @@ -653,26 +646,11 @@ def another_way_of_calling_sample(profiler_overhead_stack_thread: Thread.current it "records the cpu-time when garbage collection started in the caller thread's context" do on_gc_start - expect(per_thread_context.fetch(Thread.current)).to include(:'gc_tracking.cpu_time_at_start_ns' => be > 0) - end - end - end - - context 'when called again after on_gc_finish but before sample_after_gc' do - before do - on_gc_start - on_gc_finish - end + cpu_time_at_previous_sample_ns = per_thread_context.fetch(Thread.current).fetch(:cpu_time_at_previous_sample_ns) - it 'does not change the gc start times' do - start_times = proc do - cpu_time = per_thread_context.fetch(Thread.current).fetch(:'gc_tracking.cpu_time_at_start_ns') - wall_time = per_thread_context.fetch(Thread.current).fetch(:'gc_tracking.wall_time_at_start_ns') - - [cpu_time, wall_time] + expect(per_thread_context.fetch(Thread.current)) + .to include(:'gc_tracking.cpu_time_at_start_ns' => (be > cpu_time_at_previous_sample_ns)) end - - expect { on_gc_start }.to_not change(&start_times) end end end @@ -693,38 +671,54 @@ def another_way_of_calling_sample(profiler_overhead_stack_thread: Thread.current context 'when on_gc_start was not called before' do # See comment in the actual implementation on when/why this can happen - it 'does not change the gc finish times' do + it 'does not change the wall_time_at_previous_gc_ns' do on_gc_finish - expect(per_thread_context.fetch(Thread.current).fetch(:'gc_tracking.cpu_time_at_finish_ns')).to be invalid_time - expect(per_thread_context.fetch(Thread.current).fetch(:'gc_tracking.wall_time_at_finish_ns')).to be invalid_time + expect(gc_tracking.fetch(:wall_time_at_previous_gc_ns)).to be invalid_time end end context 'when on_gc_start was previously called' do before { on_gc_start } - it "records the wall-time when garbage collection finished in the caller thread's context" do + it 'records the wall-time when garbage collection finished in the gc_tracking' do wall_time_before_on_gc_finish_ns = Datadog::Core::Utils::Time.get_time(:nanosecond) on_gc_finish wall_time_after_on_gc_finish_ns = Datadog::Core::Utils::Time.get_time(:nanosecond) + expect(gc_tracking.fetch(:wall_time_at_previous_gc_ns)) + .to be_between(wall_time_before_on_gc_finish_ns, wall_time_after_on_gc_finish_ns) + end + + it 'resets the gc tracking fields back to invalid_time' do + on_gc_finish + expect(per_thread_context.fetch(Thread.current)).to include( - :'gc_tracking.wall_time_at_finish_ns' => - be_between(wall_time_before_on_gc_finish_ns, wall_time_after_on_gc_finish_ns) + :'gc_tracking.cpu_time_at_start_ns' => invalid_time, + :'gc_tracking.wall_time_at_start_ns' => invalid_time, ) end + it 'records the wall-time time spent between calls to on_gc_start and on_gc_finish' do + wall_time_at_start_ns = per_thread_context.fetch(Thread.current).fetch(:'gc_tracking.wall_time_at_start_ns') + + wall_time_before_on_gc_finish_ns = Datadog::Core::Utils::Time.get_time(:nanosecond) + on_gc_finish + + expect(gc_tracking.fetch(:accumulated_wall_time_ns)) + .to be >= (wall_time_before_on_gc_finish_ns - wall_time_at_start_ns) + end + context 'cpu-time behavior' do context 'when not on Linux' do before do skip 'The fallback behavior only applies when not on Linux' if PlatformHelpers.linux? end - it "records the cpu-time when garbage collection finished in the caller thread's context as zero" do + it 'records the accumulated_cpu_time_ns as zero' do on_gc_finish - expect(per_thread_context.fetch(Thread.current)).to include(:'gc_tracking.cpu_time_at_finish_ns' => 0) + expect(gc_tracking.fetch(:accumulated_cpu_time_ns)).to be 0 end end @@ -733,187 +727,152 @@ def another_way_of_calling_sample(profiler_overhead_stack_thread: Thread.current skip 'Test only runs on Linux' unless PlatformHelpers.linux? end - it "records the cpu-time when garbage collection finished in the caller thread's context" do + it 'records the cpu-time spent between calls to on_gc_start and on_gc_finish' do on_gc_finish - expect(per_thread_context.fetch(Thread.current)).to include(:'gc_tracking.cpu_time_at_finish_ns' => be > 0) + expect(gc_tracking.fetch(:accumulated_cpu_time_ns)).to be > 0 + end + + it 'advances the cpu_time_at_previous_sample_ns for the sampled thread by the time spent in garbage collection' do + cpu_time_at_previous_sample_ns_before = + per_thread_context.fetch(Thread.current).fetch(:cpu_time_at_previous_sample_ns) + + on_gc_finish + + expect(per_thread_context.fetch(Thread.current)).to include( + cpu_time_at_previous_sample_ns: be > cpu_time_at_previous_sample_ns_before + ) end end end end context 'when going through multiple cycles of on_gc_start/on_gc_finish without sample_after_gc getting called' do - it 'keeps the cpu-time and wall-time at finish from the LAST on_gc_finish' do - context_tracking = [] + let(:context_tracking) { [] } + before do 5.times do on_gc_start on_gc_finish - context_tracking << per_thread_context.fetch(Thread.current) + context_tracking << gc_tracking end - - cpu_time_from_last_on_gc_finish = context_tracking.last.fetch(:'gc_tracking.cpu_time_at_finish_ns') - wall_time_from_last_on_gc_finish = context_tracking.last.fetch(:'gc_tracking.wall_time_at_finish_ns') - - expect(context_tracking.first) - .to include(:'gc_tracking.wall_time_at_finish_ns' => be < wall_time_from_last_on_gc_finish) - - # This always advances: all_but_last <= the last one - # (Needs the <= because unfortunately we may not get enough precision, otherwise it would be <) - all_but_last = context_tracking[0..-2] - expect( - all_but_last.map { |it| it.fetch(:'gc_tracking.cpu_time_at_finish_ns') } - ).to all be <= cpu_time_from_last_on_gc_finish - expect( - all_but_last.map { |it| it.fetch(:'gc_tracking.wall_time_at_finish_ns') } - ).to all be <= wall_time_from_last_on_gc_finish end - end - end - end - describe '#sample_after_gc' do - let(:gc_samples) { samples.select { |it| it.locations.first.path == 'Garbage Collection' } } + it 'accumulates the cpu-time and wall-time from the multiple GCs' do + all_accumulated_wall_time = context_tracking.map { |it| it.fetch(:accumulated_wall_time_ns) } - before { sample } + expect(all_accumulated_wall_time).to eq all_accumulated_wall_time.sort + expect(all_accumulated_wall_time.first).to be <= all_accumulated_wall_time.last - context 'when there is no thread with gc time to be sampled' do - it 'does not sample any threads' do - sample_after_gc + all_accumulated_cpu_time = context_tracking.map { |it| it.fetch(:accumulated_cpu_time_ns) } + expect(all_accumulated_cpu_time).to eq all_accumulated_cpu_time.sort - expect(gc_samples).to be_empty - end + expect(all_accumulated_cpu_time.first).to be < all_accumulated_cpu_time.last if all_accumulated_cpu_time.first > 0 + end - it 'does not increment the gc_samples stat' do - sample_after_gc + it 'updates the wall_time_at_previous_gc_ns with the latest one' do + all_wall_time_at_previous_gc_ns = context_tracking.map { |it| it.fetch(:wall_time_at_previous_gc_ns) } - expect(stats.fetch(:gc_samples)).to be 0 + expect(all_wall_time_at_previous_gc_ns.last).to be all_wall_time_at_previous_gc_ns.max + end end end + end - context 'when there is a thread with gc start time but no finish time' do - before { on_gc_start } - - it 'does not sample any threads' do - sample_after_gc - - expect(gc_samples).to be_empty - end - - it 'does not increment the gc_samples stat' do - sample_after_gc + describe '#sample_after_gc' do + before { sample } - expect(stats.fetch(:gc_samples)).to be 0 + context 'when called before on_gc_start/on_gc_finish' do + it do + expect { sample_after_gc }.to raise_error(RuntimeError, /Unexpected call to sample_after_gc/) end end - context 'when there is a thread with a recorded gc start and finish time' do - let(:gc_sample) do - expect(gc_samples.size).to be 1 - gc_samples.first - end + context 'when there is gc information to record' do + let(:gc_sample) { samples.find { |it| it.labels.fetch(:'thread name') == 'Garbage Collection' } } before do on_gc_start + @time_before = Datadog::Core::Utils::Time.as_utc_epoch_ns(Time.now) on_gc_finish + @time_after = Datadog::Core::Utils::Time.as_utc_epoch_ns(Time.now) end - it 'samples the thread with recorded gc start and finish time, marking it as being in Garbage Collection' do - sample_after_gc + context 'when called more than once in a row' do + it do + sample_after_gc + + expect { sample_after_gc }.to raise_error(RuntimeError, /Unexpected call to sample_after_gc/) + end + end - expect(object_id_from(gc_sample.labels.fetch(:'thread id'))).to eq Thread.current.object_id + it 'increments the gc_samples counter' do + expect { sample_after_gc }.to change { stats.fetch(:gc_samples) }.from(0).to(1) end - it 'samples the thread with recorded gc start and finish time, recording the times between gc start and finish' do - cpu_time_at_start_ns = per_thread_context.fetch(Thread.current).fetch(:'gc_tracking.cpu_time_at_start_ns') - cpu_time_at_finish_ns = per_thread_context.fetch(Thread.current).fetch(:'gc_tracking.cpu_time_at_finish_ns') - wall_time_at_start_ns = per_thread_context.fetch(Thread.current).fetch(:'gc_tracking.wall_time_at_start_ns') - wall_time_at_finish_ns = per_thread_context.fetch(Thread.current).fetch(:'gc_tracking.wall_time_at_finish_ns') + it 'sets the wall_time_at_last_flushed_gc_event_ns from the wall_time_at_previous_gc_ns' do + wall_time_at_previous_gc_ns = gc_tracking.fetch(:wall_time_at_previous_gc_ns) sample_after_gc - expect(gc_sample.values).to include( - :"cpu-samples" => 1, - :'cpu-time' => cpu_time_at_finish_ns - cpu_time_at_start_ns, - :"wall-time" => wall_time_at_finish_ns - wall_time_at_start_ns, - ) + expect(gc_tracking.fetch(:wall_time_at_last_flushed_gc_event_ns)).to be wall_time_at_previous_gc_ns end - it 'resets the gc tracking fields back to invalid_time' do + it 'resets the wall_time_at_previous_gc_ns to invalid_time' do sample_after_gc - expect(per_thread_context.fetch(Thread.current)).to include( - :'gc_tracking.cpu_time_at_start_ns' => invalid_time, - :'gc_tracking.cpu_time_at_finish_ns' => invalid_time, - :'gc_tracking.wall_time_at_start_ns' => invalid_time, - :'gc_tracking.wall_time_at_finish_ns' => invalid_time, - ) - end - - it 'increments the gc_samples stat' do - expect { sample_after_gc }.to change { stats.fetch(:gc_samples) }.from(0).to(1) + expect(gc_tracking.fetch(:wall_time_at_previous_gc_ns)).to be invalid_time end - it 'does not sample any other threads' do + it 'creates a Garbage Collection sample' do sample_after_gc - expect(gc_samples.size).to be 1 + expect(gc_sample.values.fetch(:'cpu-samples')).to be 1 + expect(gc_sample.labels).to match a_hash_including( + :state => 'had cpu', + :'thread id' => 'GC', + :'thread name' => 'Garbage Collection', + :event => 'gc', + :'gc cause' => an_instance_of(String), + :'gc type' => an_instance_of(String), + ) + expect(gc_sample.locations.first.path).to eq 'Garbage Collection' end - it 'advances the wall_time_at_previous_sample_ns for the sampled thread by the time spent in garbage collection' do - wall_time_at_previous_sample_ns_before = - per_thread_context.fetch(Thread.current).fetch(:wall_time_at_previous_sample_ns) + it 'creates a Garbage Collection sample using the accumulated_cpu_time_ns and accumulated_wall_time_ns' do + accumulated_cpu_time_ns = gc_tracking.fetch(:accumulated_cpu_time_ns) + accumulated_wall_time_ns = gc_tracking.fetch(:accumulated_wall_time_ns) sample_after_gc - wall_time_spent_in_gc = gc_sample.values.fetch(:'wall-time') - - expect(per_thread_context.fetch(Thread.current)).to include( - wall_time_at_previous_sample_ns: wall_time_at_previous_sample_ns_before + wall_time_spent_in_gc + expect(gc_sample.values).to match a_hash_including( + :'cpu-time' => accumulated_cpu_time_ns, + :'wall-time' => accumulated_wall_time_ns, ) end - context 'cpu-time behavior' do - context 'when not on Linux' do - before do - skip 'The fallback behavior only applies when not on Linux' if PlatformHelpers.linux? - end - - it 'keeps the cpu_time_at_previous_sample_ns as invalid_time' do - sample_after_gc - - expect(per_thread_context.fetch(Thread.current)).to include(cpu_time_at_previous_sample_ns: invalid_time) - end - end + it 'does not include the timeline timestamp' do + sample_after_gc - context 'on Linux' do - before do - skip 'Test only runs on Linux' unless PlatformHelpers.linux? - end + expect(gc_sample.labels.keys).to_not include(:end_timestamp_ns) + end - it 'advances the cpu_time_at_previous_sample_ns for the sampled thread by the time spent in garbage collection' do - cpu_time_at_previous_sample_ns_before = - per_thread_context.fetch(Thread.current).fetch(:cpu_time_at_previous_sample_ns) + context 'when timeline is enabled' do + let(:timeline_enabled) { true } - sample_after_gc + it 'creates a Garbage Collection sample using the accumulated_wall_time_ns as the timeline duration' do + accumulated_wall_time_ns = gc_tracking.fetch(:accumulated_wall_time_ns) - cpu_time_spent_in_gc = gc_sample.values.fetch(:'cpu-time') + sample_after_gc - expect(per_thread_context.fetch(Thread.current)).to include( - cpu_time_at_previous_sample_ns: cpu_time_at_previous_sample_ns_before + cpu_time_spent_in_gc - ) - end + expect(gc_sample.values.fetch(:timeline)).to be accumulated_wall_time_ns end - end - - context 'when timeline is enabled' do - let(:timeline_enabled) { true } - it 'does not include end_timestamp_ns labels in GC samples' do + it 'creates a Garbage Collection sample using the timestamp set by on_gc_finish, converted to epoch ns' do sample_after_gc - expect(gc_samples.first.labels.keys).to_not include(:end_timestamp_ns) + expect(gc_sample.labels.fetch(:end_timestamp_ns)).to be_between(@time_before, @time_after) end end end From a2c8913654d9b0fb9b15586d1ec86f145bc1824e Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Fri, 8 Dec 2023 14:01:14 +0000 Subject: [PATCH 10/21] Remove `SAMPLE_IN_GC` enum entry Instead, we expose the `record_placeholder_stack` directly. --- .../collectors_stack.c | 47 ++++++++----------- .../collectors_stack.h | 9 +++- .../collectors_thread_context.c | 13 +++-- 3 files changed, 33 insertions(+), 36 deletions(-) diff --git a/ext/ddtrace_profiling_native_extension/collectors_stack.c b/ext/ddtrace_profiling_native_extension/collectors_stack.c index 21cb2d26c4b..0e412e68aeb 100644 --- a/ext/ddtrace_profiling_native_extension/collectors_stack.c +++ b/ext/ddtrace_profiling_native_extension/collectors_stack.c @@ -37,13 +37,6 @@ static VALUE _native_sample( ); static void maybe_add_placeholder_frames_omitted(VALUE thread, sampling_buffer* buffer, char *frames_omitted_message, int frames_omitted_message_size); static void record_placeholder_stack_in_native_code(sampling_buffer* buffer, VALUE recorder_instance, sample_values values, sample_labels labels); -static void record_placeholder_stack( - sampling_buffer* buffer, - VALUE recorder_instance, - sample_values values, - sample_labels labels, - ddog_prof_Function placeholder_stack -); static void sample_thread_internal( VALUE thread, sampling_buffer* buffer, @@ -121,14 +114,24 @@ static VALUE _native_sample( ddog_prof_Slice_Label slice_labels = {.ptr = labels, .len = labels_count}; - sample_thread( - thread, - buffer, - recorder_instance, - values, - (sample_labels) {.labels = slice_labels, .state_label = state_label}, - RTEST(in_gc) ? SAMPLE_IN_GC : SAMPLE_REGULAR - ); + if (in_gc == Qtrue) { + record_placeholder_stack( + buffer, + recorder_instance, + values, + (sample_labels) {.labels = slice_labels, .state_label = state_label}, + (ddog_prof_Function) {.name = DDOG_CHARSLICE_C(""), .filename = DDOG_CHARSLICE_C("Garbage Collection")} + ); + } else { + sample_thread( + thread, + buffer, + recorder_instance, + values, + (sample_labels) {.labels = slice_labels, .state_label = state_label}, + SAMPLE_REGULAR + ); + } sampling_buffer_free(buffer); @@ -149,18 +152,6 @@ void sample_thread( return; } - // Skips sampling the stack -- records only a "Garbage Collection" frame and the values/labels - if (type == SAMPLE_IN_GC) { - record_placeholder_stack( - buffer, - recorder_instance, - values, - labels, - (ddog_prof_Function) {.name = DDOG_CHARSLICE_C(""), .filename = DDOG_CHARSLICE_C("Garbage Collection")} - ); - return; - } - rb_raise(rb_eArgError, "Unexpected value for sample_type: %d", type); } @@ -362,7 +353,7 @@ static void record_placeholder_stack_in_native_code( ); } -static void record_placeholder_stack( +void record_placeholder_stack( sampling_buffer* buffer, VALUE recorder_instance, sample_values values, diff --git a/ext/ddtrace_profiling_native_extension/collectors_stack.h b/ext/ddtrace_profiling_native_extension/collectors_stack.h index e4175ae35e8..ea4c498a306 100644 --- a/ext/ddtrace_profiling_native_extension/collectors_stack.h +++ b/ext/ddtrace_profiling_native_extension/collectors_stack.h @@ -6,7 +6,7 @@ typedef struct sampling_buffer sampling_buffer; -typedef enum { SAMPLE_REGULAR, SAMPLE_IN_GC } sample_type; +typedef enum { SAMPLE_REGULAR } sample_type; void sample_thread( VALUE thread, @@ -16,5 +16,12 @@ void sample_thread( sample_labels labels, sample_type type ); +void record_placeholder_stack( + sampling_buffer* buffer, + VALUE recorder_instance, + sample_values values, + sample_labels labels, + ddog_prof_Function placeholder_stack +); sampling_buffer *sampling_buffer_new(unsigned int max_frames); void sampling_buffer_free(sampling_buffer *buffer); diff --git a/ext/ddtrace_profiling_native_extension/collectors_thread_context.c b/ext/ddtrace_profiling_native_extension/collectors_thread_context.c index 68b6a0683f3..3c2a7b18cf0 100644 --- a/ext/ddtrace_profiling_native_extension/collectors_thread_context.c +++ b/ext/ddtrace_profiling_native_extension/collectors_thread_context.c @@ -48,8 +48,8 @@ // into the global gc_tracking structure, and further samples are not affected. (The `cpu_time_at_previous_sample_ns` // of the thread that did GC also gets adjusted to avoid double-accounting.) // -// Finally, when `thread_context_collector_sample_after_gc` gets called, a sample gets recorded, using the special -// `SAMPLE_IN_GC` sample type, which produces a stack with a placeholder `Garbage Collection` frame. This sample gets +// Finally, when `thread_context_collector_sample_after_gc` gets called, a sample gets recorded with a stack having +// a single placeholder `Garbage Collection` frame. This sample gets // assigned the cpu-time and wall-time that was recorded between calls to `on_gc_start` and `on_gc_finish`, as well // as metadata for the last GC. // @@ -639,10 +639,9 @@ VALUE thread_context_collector_sample_after_gc(VALUE self_instance) { end_timestamp_ns = monotonic_to_system_epoch_ns(&state->time_converter_state, state->gc_tracking.wall_time_at_previous_gc_ns); } - sample_thread( - /* thread: */ Qnil, - /* buffer: */ state->sampling_buffer, - /* recorder_instance: */ state->recorder_instance, + record_placeholder_stack( + state->sampling_buffer, + state->recorder_instance, (sample_values) { .cpu_time_ns = state->gc_tracking.accumulated_cpu_time_ns, .cpu_or_wall_samples = 1, @@ -650,7 +649,7 @@ VALUE thread_context_collector_sample_after_gc(VALUE self_instance) { .timeline_wall_time_ns = state->gc_tracking.accumulated_wall_time_ns, }, (sample_labels) {.labels = slice_labels, .state_label = NULL, .end_timestamp_ns = end_timestamp_ns}, - SAMPLE_IN_GC + (ddog_prof_Function) {.name = DDOG_CHARSLICE_C(""), .filename = DDOG_CHARSLICE_C("Garbage Collection")} ); state->gc_tracking.wall_time_at_last_flushed_gc_event_ns = state->gc_tracking.wall_time_at_previous_gc_ns; From ab39dce5de6b7a841fef0578043d93d3c0ac25e9 Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Fri, 8 Dec 2023 14:08:30 +0000 Subject: [PATCH 11/21] Remove `sample_type` enum No longer needed! --- .../collectors_stack.c | 22 ++----------------- .../collectors_stack.h | 5 +---- .../collectors_thread_context.c | 7 +----- 3 files changed, 4 insertions(+), 30 deletions(-) diff --git a/ext/ddtrace_profiling_native_extension/collectors_stack.c b/ext/ddtrace_profiling_native_extension/collectors_stack.c index 0e412e68aeb..93076a6bc1a 100644 --- a/ext/ddtrace_profiling_native_extension/collectors_stack.c +++ b/ext/ddtrace_profiling_native_extension/collectors_stack.c @@ -128,8 +128,7 @@ static VALUE _native_sample( buffer, recorder_instance, values, - (sample_labels) {.labels = slice_labels, .state_label = state_label}, - SAMPLE_REGULAR + (sample_labels) {.labels = slice_labels, .state_label = state_label} ); } @@ -138,23 +137,6 @@ static VALUE _native_sample( return Qtrue; } -void sample_thread( - VALUE thread, - sampling_buffer* buffer, - VALUE recorder_instance, - sample_values values, - sample_labels labels, - sample_type type -) { - // Samples thread into recorder - if (type == SAMPLE_REGULAR) { - sample_thread_internal(thread, buffer, recorder_instance, values, labels); - return; - } - - rb_raise(rb_eArgError, "Unexpected value for sample_type: %d", type); -} - #define CHARSLICE_EQUALS(must_be_a_literal, charslice) (strlen("" must_be_a_literal) == charslice.len && strncmp(must_be_a_literal, charslice.ptr, charslice.len) == 0) // Idea: Should we release the global vm lock (GVL) after we get the data from `rb_profile_frames`? That way other Ruby threads @@ -166,7 +148,7 @@ void sample_thread( // * Should we move this into a different thread entirely? // * If we don't move it into a different thread, does releasing the GVL on a Ruby thread mean that we're introducing // a new thread switch point where there previously was none? -static void sample_thread_internal( +void sample_thread( VALUE thread, sampling_buffer* buffer, VALUE recorder_instance, diff --git a/ext/ddtrace_profiling_native_extension/collectors_stack.h b/ext/ddtrace_profiling_native_extension/collectors_stack.h index ea4c498a306..3c9ae15db91 100644 --- a/ext/ddtrace_profiling_native_extension/collectors_stack.h +++ b/ext/ddtrace_profiling_native_extension/collectors_stack.h @@ -6,15 +6,12 @@ typedef struct sampling_buffer sampling_buffer; -typedef enum { SAMPLE_REGULAR } sample_type; - void sample_thread( VALUE thread, sampling_buffer* buffer, VALUE recorder_instance, sample_values values, - sample_labels labels, - sample_type type + sample_labels labels ); void record_placeholder_stack( sampling_buffer* buffer, diff --git a/ext/ddtrace_profiling_native_extension/collectors_thread_context.c b/ext/ddtrace_profiling_native_extension/collectors_thread_context.c index 3c2a7b18cf0..5659221d5e3 100644 --- a/ext/ddtrace_profiling_native_extension/collectors_thread_context.c +++ b/ext/ddtrace_profiling_native_extension/collectors_thread_context.c @@ -190,7 +190,6 @@ static void trigger_sample_for_thread( VALUE stack_from_thread, struct per_thread_context *thread_context, sample_values values, - sample_type type, long current_monotonic_wall_time_ns, ddog_CharSlice *ruby_vm_type, ddog_CharSlice *class_name @@ -496,7 +495,6 @@ void update_metrics_and_sample( stack_from_thread, thread_context, (sample_values) {.cpu_time_ns = cpu_time_elapsed_ns, .cpu_or_wall_samples = 1, .wall_time_ns = wall_time_elapsed_ns}, - SAMPLE_REGULAR, current_monotonic_wall_time_ns, NULL, NULL @@ -667,7 +665,6 @@ static void trigger_sample_for_thread( VALUE stack_from_thread, // This can be different when attributing profiler overhead using a different stack struct per_thread_context *thread_context, sample_values values, - sample_type type, long current_monotonic_wall_time_ns, // These two labels are only used for allocation profiling; @ivoanjo: may want to refactor this at some point? ddog_CharSlice *ruby_vm_type, @@ -790,8 +787,7 @@ static void trigger_sample_for_thread( state->sampling_buffer, state->recorder_instance, values, - (sample_labels) {.labels = slice_labels, .state_label = state_label, .end_timestamp_ns = end_timestamp_ns}, - type + (sample_labels) {.labels = slice_labels, .state_label = state_label, .end_timestamp_ns = end_timestamp_ns} ); } @@ -1249,7 +1245,6 @@ void thread_context_collector_sample_allocation(VALUE self_instance, unsigned in /* stack_from_thread: */ current_thread, get_or_create_context_for(current_thread, state), (sample_values) {.alloc_samples = sample_weight}, - SAMPLE_REGULAR, INVALID_TIME, // For now we're not collecting timestamps for allocation events, as per profiling team internal discussions &ruby_vm_type, optional_class_name From f3a5c979bf1bbaea8ff919eccb31022122638d57 Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Fri, 8 Dec 2023 14:10:57 +0000 Subject: [PATCH 12/21] Fix rubocop nitpick --- spec/datadog/profiling/collectors/thread_context_spec.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec/datadog/profiling/collectors/thread_context_spec.rb b/spec/datadog/profiling/collectors/thread_context_spec.rb index 105af8c2900..6ebfd62cea1 100644 --- a/spec/datadog/profiling/collectors/thread_context_spec.rb +++ b/spec/datadog/profiling/collectors/thread_context_spec.rb @@ -733,7 +733,7 @@ def another_way_of_calling_sample(profiler_overhead_stack_thread: Thread.current expect(gc_tracking.fetch(:accumulated_cpu_time_ns)).to be > 0 end - it 'advances the cpu_time_at_previous_sample_ns for the sampled thread by the time spent in garbage collection' do + it 'advances the cpu_time_at_previous_sample_ns for the sampled thread by the time spent in GC' do cpu_time_at_previous_sample_ns_before = per_thread_context.fetch(Thread.current).fetch(:cpu_time_at_previous_sample_ns) From b78d772aeea17d5a5074e42a25ca2d2fb51395f7 Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Fri, 8 Dec 2023 14:35:07 +0000 Subject: [PATCH 13/21] Tweak code to avoid triggering warnings on the buggy gcc that our tracer-2.7 docker image ships This allows us to keep using `-Werror` in CI without issue, which is pretty valuable. --- .../collectors_gc_profiling_helper.c | 4 ++++ ext/ddtrace_profiling_native_extension/collectors_stack.c | 6 ++++-- .../collectors_thread_context.c | 3 ++- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/ext/ddtrace_profiling_native_extension/collectors_gc_profiling_helper.c b/ext/ddtrace_profiling_native_extension/collectors_gc_profiling_helper.c index a0b7cdbe376..e9dd6c727d9 100644 --- a/ext/ddtrace_profiling_native_extension/collectors_gc_profiling_helper.c +++ b/ext/ddtrace_profiling_native_extension/collectors_gc_profiling_helper.c @@ -79,21 +79,25 @@ uint8_t gc_profiling_set_metadata(ddog_prof_Label *labels, int labels_length) { labels[label_pos++] = (ddog_prof_Label) { .key = DDOG_CHARSLICE_C("thread id"), .str = DDOG_CHARSLICE_C("GC"), + .num = 0, // This shouldn't be needed but the tracer-2.7 docker image ships a buggy gcc that complains about this }; labels[label_pos++] = (ddog_prof_Label) { .key = DDOG_CHARSLICE_C("thread name"), .str = DDOG_CHARSLICE_C("Garbage Collection"), + .num = 0, // Workaround, same as above }; labels[label_pos++] = (ddog_prof_Label) { .key = DDOG_CHARSLICE_C("state"), .str = DDOG_CHARSLICE_C("had cpu"), + .num = 0, // Workaround, same as above }; labels[label_pos++] = (ddog_prof_Label) { .key = DDOG_CHARSLICE_C("event"), .str = DDOG_CHARSLICE_C("gc"), + .num = 0, // Workaround, same as above }; VALUE major_by = rb_gc_latest_gc_info(major_by_sym); diff --git a/ext/ddtrace_profiling_native_extension/collectors_stack.c b/ext/ddtrace_profiling_native_extension/collectors_stack.c index 93076a6bc1a..404a4cabcb6 100644 --- a/ext/ddtrace_profiling_native_extension/collectors_stack.c +++ b/ext/ddtrace_profiling_native_extension/collectors_stack.c @@ -115,12 +115,13 @@ static VALUE _native_sample( ddog_prof_Slice_Label slice_labels = {.ptr = labels, .len = labels_count}; if (in_gc == Qtrue) { + ddog_CharSlice gc_placeholder = DDOG_CHARSLICE_C("Garbage Collection"); record_placeholder_stack( buffer, recorder_instance, values, (sample_labels) {.labels = slice_labels, .state_label = state_label}, - (ddog_prof_Function) {.name = DDOG_CHARSLICE_C(""), .filename = DDOG_CHARSLICE_C("Garbage Collection")} + (ddog_prof_Function) {.name = DDOG_CHARSLICE_C(""), .filename = gc_placeholder} ); } else { sample_thread( @@ -326,12 +327,13 @@ static void record_placeholder_stack_in_native_code( sample_values values, sample_labels labels ) { + ddog_CharSlice in_native_code_placeholder = DDOG_CHARSLICE_C("In native code"); record_placeholder_stack( buffer, recorder_instance, values, labels, - (ddog_prof_Function) {.name = DDOG_CHARSLICE_C(""), .filename = DDOG_CHARSLICE_C("In native code")} + (ddog_prof_Function) {.name = DDOG_CHARSLICE_C(""), .filename = in_native_code_placeholder} ); } diff --git a/ext/ddtrace_profiling_native_extension/collectors_thread_context.c b/ext/ddtrace_profiling_native_extension/collectors_thread_context.c index 5659221d5e3..34862ec3fb8 100644 --- a/ext/ddtrace_profiling_native_extension/collectors_thread_context.c +++ b/ext/ddtrace_profiling_native_extension/collectors_thread_context.c @@ -637,6 +637,7 @@ VALUE thread_context_collector_sample_after_gc(VALUE self_instance) { end_timestamp_ns = monotonic_to_system_epoch_ns(&state->time_converter_state, state->gc_tracking.wall_time_at_previous_gc_ns); } + ddog_CharSlice gc_placeholder = DDOG_CHARSLICE_C("Garbage Collection"); record_placeholder_stack( state->sampling_buffer, state->recorder_instance, @@ -647,7 +648,7 @@ VALUE thread_context_collector_sample_after_gc(VALUE self_instance) { .timeline_wall_time_ns = state->gc_tracking.accumulated_wall_time_ns, }, (sample_labels) {.labels = slice_labels, .state_label = NULL, .end_timestamp_ns = end_timestamp_ns}, - (ddog_prof_Function) {.name = DDOG_CHARSLICE_C(""), .filename = DDOG_CHARSLICE_C("Garbage Collection")} + (ddog_prof_Function) {.name = DDOG_CHARSLICE_C(""), .filename = gc_placeholder} ); state->gc_tracking.wall_time_at_last_flushed_gc_event_ns = state->gc_tracking.wall_time_at_previous_gc_ns; From d97f4dc528796e69d5c61d9ab9c9be54c0fd4bdb Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Fri, 8 Dec 2023 14:50:27 +0000 Subject: [PATCH 14/21] Tweak spec to avoid flakiness Because GC samples can be coalesced, we can't assert on 5 GCs resulting in exactly 5 GC events. --- .../profiling/collectors/cpu_and_wall_time_worker_spec.rb | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb b/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb index 9f0a993924d..e01da3f1831 100644 --- a/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb +++ b/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb @@ -256,11 +256,10 @@ described_class::Testing._native_trigger_sample - invoke_gc_times = 5 - - invoke_gc_times.times do + 5.times do Thread.pass GC.start + Thread.pass end cpu_and_wall_time_worker.stop @@ -269,7 +268,6 @@ gc_sample = all_samples.find { |sample| sample.labels[:'gc cause'] == 'GC.start()' } - expect(gc_sample.values.fetch(:'cpu-samples')).to be >= invoke_gc_times expect(gc_sample.labels).to match a_hash_including( :state => 'had cpu', :'thread id' => 'GC', From 0127e3a84fa068a33a0b78887229ded0e9652dba Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Fri, 8 Dec 2023 15:26:15 +0000 Subject: [PATCH 15/21] Update outdated comment --- .../collectors_cpu_and_wall_time_worker.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time_worker.c b/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time_worker.c index 0792e699dff..876dbd4afee 100644 --- a/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time_worker.c +++ b/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time_worker.c @@ -720,8 +720,8 @@ static void on_gc_event(VALUE tracepoint_data, DDTRACE_UNUSED void *unused) { } else if (event == RUBY_INTERNAL_EVENT_GC_EXIT) { bool should_flush = thread_context_collector_on_gc_finish(state->thread_context_collector_instance); - // We use rb_postponed_job_register_one to ask Ruby to run thread_context_collector_sample_after_gc after it - // fully finishes the garbage collection, so that one is allowed to do allocations and throw exceptions as usual. + // We use rb_postponed_job_register_one to ask Ruby to run thread_context_collector_sample_after_gc when the + // thread collector flags it's time to flush. if (should_flush) rb_postponed_job_register_one(0, after_gc_from_postponed_job, NULL); } } From e8d7b4e5fa16b2f11ee5bbd6758566884fc418fb Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Mon, 11 Dec 2023 12:58:01 +0000 Subject: [PATCH 16/21] Reorder steps in on_gc_finish as suggested during PR review --- .../collectors_thread_context.c | 20 ++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/ext/ddtrace_profiling_native_extension/collectors_thread_context.c b/ext/ddtrace_profiling_native_extension/collectors_thread_context.c index 34862ec3fb8..c158cea5e6c 100644 --- a/ext/ddtrace_profiling_native_extension/collectors_thread_context.c +++ b/ext/ddtrace_profiling_native_extension/collectors_thread_context.c @@ -555,28 +555,30 @@ bool thread_context_collector_on_gc_finish(VALUE self_instance) { // how often this happens -- see on_gc_start. if (thread_context == NULL) return false; - if (thread_context->gc_tracking.cpu_time_at_start_ns == INVALID_TIME && - thread_context->gc_tracking.wall_time_at_start_ns == INVALID_TIME) { + long cpu_time_at_start_ns = thread_context->gc_tracking.cpu_time_at_start_ns; + long wall_time_at_start_ns = thread_context->gc_tracking.wall_time_at_start_ns; + + if (cpu_time_at_start_ns == INVALID_TIME && wall_time_at_start_ns == INVALID_TIME) { // If this happened, it means that on_gc_start was either never called for the thread OR it was called but no thread // context existed at the time. The former can be the result of a bug, but since we can't distinguish them, we just // do nothing. return false; } - // Here we record the wall-time second and in on_gc_start we record it first to try to avoid having wall-time be slightly < cpu-time - long cpu_time_at_finish_ns = cpu_time_now_ns(thread_context); - long wall_time_at_finish_ns = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE); - - long gc_cpu_time_elapsed_ns = cpu_time_at_finish_ns - thread_context->gc_tracking.cpu_time_at_start_ns; - long gc_wall_time_elapsed_ns = wall_time_at_finish_ns - thread_context->gc_tracking.wall_time_at_start_ns; - // Mark thread as no longer in GC thread_context->gc_tracking.cpu_time_at_start_ns = INVALID_TIME; thread_context->gc_tracking.wall_time_at_start_ns = INVALID_TIME; + // Here we record the wall-time second and in on_gc_start we record it first to try to avoid having wall-time be slightly < cpu-time + long cpu_time_at_finish_ns = cpu_time_now_ns(thread_context); + long wall_time_at_finish_ns = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE); + // If our end timestamp is not OK, we bail out if (wall_time_at_finish_ns == 0) return false; + long gc_cpu_time_elapsed_ns = cpu_time_at_finish_ns - cpu_time_at_start_ns; + long gc_wall_time_elapsed_ns = wall_time_at_finish_ns - wall_time_at_start_ns; + // Wall-time can go backwards if the system clock gets changed (and we observed spurious jumps back on macOS as well) // so let's ensure we don't get negative values for time deltas. gc_cpu_time_elapsed_ns = long_max_of(gc_cpu_time_elapsed_ns, 0); From c6b791d3b34f871af9b1fc857e30a8ee0515bf39 Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Mon, 11 Dec 2023 13:03:58 +0000 Subject: [PATCH 17/21] Add explanation for why we add both cpu/wall-time duration as well as timeline duration --- .../collectors_thread_context.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ext/ddtrace_profiling_native_extension/collectors_thread_context.c b/ext/ddtrace_profiling_native_extension/collectors_thread_context.c index c158cea5e6c..7360982d47b 100644 --- a/ext/ddtrace_profiling_native_extension/collectors_thread_context.c +++ b/ext/ddtrace_profiling_native_extension/collectors_thread_context.c @@ -644,6 +644,11 @@ VALUE thread_context_collector_sample_after_gc(VALUE self_instance) { state->sampling_buffer, state->recorder_instance, (sample_values) { + // This event gets both a regular cpu/wall-time duration, as a normal cpu/wall-time sample would, as well as a + // timeline duration. + // This is done to enable two use-cases: + // * regular cpu/wall-time makes this event show up as a regular stack in the flamegraph + // * the timeline duration is used when the event shows up in the timeline .cpu_time_ns = state->gc_tracking.accumulated_cpu_time_ns, .cpu_or_wall_samples = 1, .wall_time_ns = state->gc_tracking.accumulated_wall_time_ns, From 2265fc35092f2e1dfcfab6ce0e2c25e7c4490f36 Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Mon, 11 Dec 2023 13:11:41 +0000 Subject: [PATCH 18/21] Avoid duplicate boilerplate when calling `record_placeholder_stack` --- .../collectors_stack.c | 11 +++++------ .../collectors_stack.h | 2 +- .../collectors_thread_context.c | 3 +-- 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/ext/ddtrace_profiling_native_extension/collectors_stack.c b/ext/ddtrace_profiling_native_extension/collectors_stack.c index 404a4cabcb6..9bc0d11e0cb 100644 --- a/ext/ddtrace_profiling_native_extension/collectors_stack.c +++ b/ext/ddtrace_profiling_native_extension/collectors_stack.c @@ -115,13 +115,12 @@ static VALUE _native_sample( ddog_prof_Slice_Label slice_labels = {.ptr = labels, .len = labels_count}; if (in_gc == Qtrue) { - ddog_CharSlice gc_placeholder = DDOG_CHARSLICE_C("Garbage Collection"); record_placeholder_stack( buffer, recorder_instance, values, (sample_labels) {.labels = slice_labels, .state_label = state_label}, - (ddog_prof_Function) {.name = DDOG_CHARSLICE_C(""), .filename = gc_placeholder} + DDOG_CHARSLICE_C("Garbage Collection") ); } else { sample_thread( @@ -327,13 +326,12 @@ static void record_placeholder_stack_in_native_code( sample_values values, sample_labels labels ) { - ddog_CharSlice in_native_code_placeholder = DDOG_CHARSLICE_C("In native code"); record_placeholder_stack( buffer, recorder_instance, values, labels, - (ddog_prof_Function) {.name = DDOG_CHARSLICE_C(""), .filename = in_native_code_placeholder} + DDOG_CHARSLICE_C("In native code") ); } @@ -342,9 +340,10 @@ void record_placeholder_stack( VALUE recorder_instance, sample_values values, sample_labels labels, - ddog_prof_Function placeholder_stack + ddog_CharSlice placeholder_stack ) { - buffer->locations[0] = (ddog_prof_Location) {.function = placeholder_stack, .line = 0}; + ddog_prof_Function placeholder = {.name = DDOG_CHARSLICE_C(""), .filename = placeholder_stack}; + buffer->locations[0] = (ddog_prof_Location) {.function = placeholder, .line = 0}; record_sample( recorder_instance, diff --git a/ext/ddtrace_profiling_native_extension/collectors_stack.h b/ext/ddtrace_profiling_native_extension/collectors_stack.h index 3c9ae15db91..b026bbbdeb8 100644 --- a/ext/ddtrace_profiling_native_extension/collectors_stack.h +++ b/ext/ddtrace_profiling_native_extension/collectors_stack.h @@ -18,7 +18,7 @@ void record_placeholder_stack( VALUE recorder_instance, sample_values values, sample_labels labels, - ddog_prof_Function placeholder_stack + ddog_CharSlice placeholder_stack ); sampling_buffer *sampling_buffer_new(unsigned int max_frames); void sampling_buffer_free(sampling_buffer *buffer); diff --git a/ext/ddtrace_profiling_native_extension/collectors_thread_context.c b/ext/ddtrace_profiling_native_extension/collectors_thread_context.c index 7360982d47b..a86ce1dba9d 100644 --- a/ext/ddtrace_profiling_native_extension/collectors_thread_context.c +++ b/ext/ddtrace_profiling_native_extension/collectors_thread_context.c @@ -639,7 +639,6 @@ VALUE thread_context_collector_sample_after_gc(VALUE self_instance) { end_timestamp_ns = monotonic_to_system_epoch_ns(&state->time_converter_state, state->gc_tracking.wall_time_at_previous_gc_ns); } - ddog_CharSlice gc_placeholder = DDOG_CHARSLICE_C("Garbage Collection"); record_placeholder_stack( state->sampling_buffer, state->recorder_instance, @@ -655,7 +654,7 @@ VALUE thread_context_collector_sample_after_gc(VALUE self_instance) { .timeline_wall_time_ns = state->gc_tracking.accumulated_wall_time_ns, }, (sample_labels) {.labels = slice_labels, .state_label = NULL, .end_timestamp_ns = end_timestamp_ns}, - (ddog_prof_Function) {.name = DDOG_CHARSLICE_C(""), .filename = gc_placeholder} + DDOG_CHARSLICE_C("Garbage Collection") ); state->gc_tracking.wall_time_at_last_flushed_gc_event_ns = state->gc_tracking.wall_time_at_previous_gc_ns; From bfdd1b73e95214e3a670eda0d444f8a5ac1d17cc Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Wed, 13 Dec 2023 14:18:12 +0000 Subject: [PATCH 19/21] Minor fix to spec after branch rebase on top of master --- spec/datadog/profiling/stack_recorder_spec.rb | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/spec/datadog/profiling/stack_recorder_spec.rb b/spec/datadog/profiling/stack_recorder_spec.rb index 53110b574da..73f73ae4874 100644 --- a/spec/datadog/profiling/stack_recorder_spec.rb +++ b/spec/datadog/profiling/stack_recorder_spec.rb @@ -342,7 +342,9 @@ def sample_types_from(decoded_profile) describe 'heap samples' do let(:sample_rate) { 50 } - let(:metric_values) { { 'cpu-time' => 101, 'cpu-samples' => 1, 'wall-time' => 789, 'alloc-samples' => sample_rate } } + let(:metric_values) do + { 'cpu-time' => 101, 'cpu-samples' => 1, 'wall-time' => 789, 'alloc-samples' => sample_rate, 'timeline' => 42 } + end let(:labels) { { 'label_a' => 'value_a', 'label_b' => 'value_b', 'state' => 'unknown' }.to_a } let(:a_string) { 'a beautiful string' } From 5f421079fba7ea47e0cf9562a52fc2ebac96c3df Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Wed, 13 Dec 2023 15:12:21 +0000 Subject: [PATCH 20/21] Fix spec flakyness It seems that even with `GC.start()` we can observe different reasons, so I've relaxed the spec. --- .../profiling/collectors/cpu_and_wall_time_worker_spec.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb b/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb index e01da3f1831..11a2bdee3fb 100644 --- a/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb +++ b/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb @@ -273,7 +273,7 @@ :'thread id' => 'GC', :'thread name' => 'Garbage Collection', :event => 'gc', - :'gc reason' => 'requested (FORCE)', + :'gc reason' => an_instance_of(String), :'gc cause' => 'GC.start()', :'gc type' => 'major', ) From a3bdd74bd2f02bbd96e4c696b66a11201a26f122 Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Wed, 13 Dec 2023 15:28:38 +0000 Subject: [PATCH 21/21] Fix another flaky spec Because GC samples are pushed with `thread id: GC`, this was breaking `object_id_from` in tests that happened to have some GC samples. There is no `object_id` for the "GC thread" because there is no such thread -- it's just a way of representing GC information. --- spec/datadog/profiling/spec_helper.rb | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/spec/datadog/profiling/spec_helper.rb b/spec/datadog/profiling/spec_helper.rb index fe2de526335..fcb084106b9 100644 --- a/spec/datadog/profiling/spec_helper.rb +++ b/spec/datadog/profiling/spec_helper.rb @@ -65,7 +65,11 @@ def decode_frame_from_pprof(decoded_profile, location_id) end def object_id_from(thread_id) - Integer(thread_id.match(/\d+ \((?\d+)\)/)[:object_id]) + if thread_id != 'GC' + Integer(thread_id.match(/\d+ \((?\d+)\)/)[:object_id]) + else + -1 + end end def samples_for_thread(samples, thread)