diff --git a/ext/ddtrace_profiling_native_extension/collectors_thread_context.c b/ext/ddtrace_profiling_native_extension/collectors_thread_context.c index 4f5bee976ba..bdd9568f278 100644 --- a/ext/ddtrace_profiling_native_extension/collectors_thread_context.c +++ b/ext/ddtrace_profiling_native_extension/collectors_thread_context.c @@ -178,7 +178,8 @@ static void trigger_sample_for_thread( struct per_thread_context *thread_context, sample_values values, sample_type type, - long current_monotonic_wall_time_ns + long current_monotonic_wall_time_ns, + ddog_CharSlice *ruby_vm_type ); static VALUE _native_thread_list(VALUE self); static struct per_thread_context *get_or_create_context_for(VALUE thread, struct thread_context_collector_state *state); @@ -199,7 +200,7 @@ static void trace_identifiers_for(struct thread_context_collector_state *state, static bool is_type_web(VALUE root_span_type); static VALUE _native_reset_after_fork(DDTRACE_UNUSED VALUE self, VALUE collector_instance); static VALUE thread_list(struct thread_context_collector_state *state); -static VALUE _native_sample_allocation(VALUE self, VALUE collector_instance, VALUE sample_weight); +static VALUE _native_sample_allocation(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE sample_weight, VALUE new_object); static VALUE _native_new_empty_thread(VALUE self); void collectors_thread_context_init(VALUE profiling_module) { @@ -222,7 +223,7 @@ void collectors_thread_context_init(VALUE profiling_module) { rb_define_singleton_method(collectors_thread_context_class, "_native_inspect", _native_inspect, 1); rb_define_singleton_method(collectors_thread_context_class, "_native_reset_after_fork", _native_reset_after_fork, 1); rb_define_singleton_method(testing_module, "_native_sample", _native_sample, 2); - rb_define_singleton_method(testing_module, "_native_sample_allocation", _native_sample_allocation, 2); + rb_define_singleton_method(testing_module, "_native_sample_allocation", _native_sample_allocation, 3); rb_define_singleton_method(testing_module, "_native_on_gc_start", _native_on_gc_start, 1); rb_define_singleton_method(testing_module, "_native_on_gc_finish", _native_on_gc_finish, 1); rb_define_singleton_method(testing_module, "_native_sample_after_gc", _native_sample_after_gc, 1); @@ -463,7 +464,8 @@ void update_metrics_and_sample( thread_context, (sample_values) {.cpu_time_ns = cpu_time_elapsed_ns, .cpu_samples = 1, .wall_time_ns = wall_time_elapsed_ns}, SAMPLE_REGULAR, - current_monotonic_wall_time_ns + current_monotonic_wall_time_ns, + NULL ); } @@ -606,7 +608,8 @@ VALUE thread_context_collector_sample_after_gc(VALUE self_instance) { thread_context, (sample_values) {.cpu_time_ns = gc_cpu_time_elapsed_ns, .cpu_samples = 1, .wall_time_ns = gc_wall_time_elapsed_ns}, SAMPLE_IN_GC, - INVALID_TIME // For now we're not collecting timestamps for these events + INVALID_TIME, // For now we're not collecting timestamps for these events + NULL ); // Mark thread as no longer in GC @@ -637,13 +640,15 @@ static void trigger_sample_for_thread( struct per_thread_context *thread_context, sample_values values, sample_type type, - long current_monotonic_wall_time_ns + long current_monotonic_wall_time_ns, + ddog_CharSlice *ruby_vm_type // Only used for allocation profiling, NULL for others; @ivoanjo: may want to refactor this at some point? ) { int max_label_count = 1 + // thread id 1 + // thread name 1 + // profiler overhead 1 + // end_timestamp_ns + 1 + // ruby vm type 2; // local root span id and span id ddog_prof_Label labels[max_label_count]; int label_pos = 0; @@ -713,6 +718,13 @@ static void trigger_sample_for_thread( }; } + if (ruby_vm_type != NULL) { + labels[label_pos++] = (ddog_prof_Label) { + .key = DDOG_CHARSLICE_C("ruby vm type"), + .str = *ruby_vm_type + }; + } + // The number of times `label_pos++` shows up in this function needs to match `max_label_count`. To avoid "oops I // forgot to update max_label_count" in the future, we've also added this validation. // @ivoanjo: I wonder if C compilers are smart enough to statically prove this check never triggers unless someone @@ -1050,12 +1062,15 @@ static VALUE thread_list(struct thread_context_collector_state *state) { return result; } -void thread_context_collector_sample_allocation(VALUE self_instance, unsigned int sample_weight) { +void thread_context_collector_sample_allocation(VALUE self_instance, unsigned int sample_weight, VALUE new_object) { struct thread_context_collector_state *state; TypedData_Get_Struct(self_instance, struct thread_context_collector_state, &thread_context_collector_typed_data, state); VALUE current_thread = rb_thread_current(); + // Tag samples with the VM internal types + ddog_CharSlice ruby_vm_type = ruby_value_type_to_char_slice(rb_type(new_object)); + trigger_sample_for_thread( state, /* thread: */ current_thread, @@ -1063,14 +1078,15 @@ void thread_context_collector_sample_allocation(VALUE self_instance, unsigned in get_or_create_context_for(current_thread, state), (sample_values) {.alloc_samples = sample_weight}, SAMPLE_REGULAR, - INVALID_TIME // For now we're not collecting timestamps for allocation events, as per profiling team internal discussions + INVALID_TIME, // For now we're not collecting timestamps for allocation events, as per profiling team internal discussions + &ruby_vm_type ); } // This method exists only to enable testing Datadog::Profiling::Collectors::ThreadContext behavior using RSpec. // It SHOULD NOT be used for other purposes. -static VALUE _native_sample_allocation(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE sample_weight) { - thread_context_collector_sample_allocation(collector_instance, NUM2UINT(sample_weight)); +static VALUE _native_sample_allocation(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE sample_weight, VALUE new_object) { + thread_context_collector_sample_allocation(collector_instance, NUM2UINT(sample_weight), new_object); return Qtrue; } diff --git a/ext/ddtrace_profiling_native_extension/collectors_thread_context.h b/ext/ddtrace_profiling_native_extension/collectors_thread_context.h index 573c7ecbfc5..88dfabe9615 100644 --- a/ext/ddtrace_profiling_native_extension/collectors_thread_context.h +++ b/ext/ddtrace_profiling_native_extension/collectors_thread_context.h @@ -7,7 +7,7 @@ void thread_context_collector_sample( long current_monotonic_wall_time_ns, VALUE profiler_overhead_stack_thread ); -void thread_context_collector_sample_allocation(VALUE self_instance, unsigned int sample_weight); +void thread_context_collector_sample_allocation(VALUE self_instance, unsigned int sample_weight, VALUE new_object); VALUE thread_context_collector_sample_after_gc(VALUE self_instance); void thread_context_collector_on_gc_start(VALUE self_instance); void thread_context_collector_on_gc_finish(VALUE self_instance); diff --git a/ext/ddtrace_profiling_native_extension/extconf.rb b/ext/ddtrace_profiling_native_extension/extconf.rb index 44425214fff..86ee026f0a8 100644 --- a/ext/ddtrace_profiling_native_extension/extconf.rb +++ b/ext/ddtrace_profiling_native_extension/extconf.rb @@ -155,6 +155,9 @@ def add_compiler_flag(flag) # On older Rubies, there are no Ractors $defs << '-DNO_RACTORS' if RUBY_VERSION < '3' +# On older Rubies, objects would not move +$defs << '-DNO_T_MOVED' if RUBY_VERSION < '2.7' + # On older Rubies, rb_global_vm_lock_struct did not include the owner field $defs << '-DNO_GVL_OWNER' if RUBY_VERSION < '2.6' diff --git a/ext/ddtrace_profiling_native_extension/libdatadog_helpers.c b/ext/ddtrace_profiling_native_extension/libdatadog_helpers.c new file mode 100644 index 00000000000..43bd02c4a07 --- /dev/null +++ b/ext/ddtrace_profiling_native_extension/libdatadog_helpers.c @@ -0,0 +1,42 @@ +#include "libdatadog_helpers.h" + +#include + +const char *ruby_value_type_to_string(enum ruby_value_type type) { + return ruby_value_type_to_char_slice(type).ptr; +} + +ddog_CharSlice ruby_value_type_to_char_slice(enum ruby_value_type type) { + switch (type) { + case(RUBY_T_NONE ): return DDOG_CHARSLICE_C("T_NONE"); + case(RUBY_T_OBJECT ): return DDOG_CHARSLICE_C("T_OBJECT"); + case(RUBY_T_CLASS ): return DDOG_CHARSLICE_C("T_CLASS"); + case(RUBY_T_MODULE ): return DDOG_CHARSLICE_C("T_MODULE"); + case(RUBY_T_FLOAT ): return DDOG_CHARSLICE_C("T_FLOAT"); + case(RUBY_T_STRING ): return DDOG_CHARSLICE_C("T_STRING"); + case(RUBY_T_REGEXP ): return DDOG_CHARSLICE_C("T_REGEXP"); + case(RUBY_T_ARRAY ): return DDOG_CHARSLICE_C("T_ARRAY"); + case(RUBY_T_HASH ): return DDOG_CHARSLICE_C("T_HASH"); + case(RUBY_T_STRUCT ): return DDOG_CHARSLICE_C("T_STRUCT"); + case(RUBY_T_BIGNUM ): return DDOG_CHARSLICE_C("T_BIGNUM"); + case(RUBY_T_FILE ): return DDOG_CHARSLICE_C("T_FILE"); + case(RUBY_T_DATA ): return DDOG_CHARSLICE_C("T_DATA"); + case(RUBY_T_MATCH ): return DDOG_CHARSLICE_C("T_MATCH"); + case(RUBY_T_COMPLEX ): return DDOG_CHARSLICE_C("T_COMPLEX"); + case(RUBY_T_RATIONAL): return DDOG_CHARSLICE_C("T_RATIONAL"); + case(RUBY_T_NIL ): return DDOG_CHARSLICE_C("T_NIL"); + case(RUBY_T_TRUE ): return DDOG_CHARSLICE_C("T_TRUE"); + case(RUBY_T_FALSE ): return DDOG_CHARSLICE_C("T_FALSE"); + case(RUBY_T_SYMBOL ): return DDOG_CHARSLICE_C("T_SYMBOL"); + case(RUBY_T_FIXNUM ): return DDOG_CHARSLICE_C("T_FIXNUM"); + case(RUBY_T_UNDEF ): return DDOG_CHARSLICE_C("T_UNDEF"); + case(RUBY_T_IMEMO ): return DDOG_CHARSLICE_C("T_IMEMO"); + case(RUBY_T_NODE ): return DDOG_CHARSLICE_C("T_NODE"); + case(RUBY_T_ICLASS ): return DDOG_CHARSLICE_C("T_ICLASS"); + case(RUBY_T_ZOMBIE ): return DDOG_CHARSLICE_C("T_ZOMBIE"); + #ifndef NO_T_MOVED + case(RUBY_T_MOVED ): return DDOG_CHARSLICE_C("T_MOVED"); + #endif + default: return DDOG_CHARSLICE_C("BUG: Unknown value for ruby_value_type"); + } +} diff --git a/ext/ddtrace_profiling_native_extension/libdatadog_helpers.h b/ext/ddtrace_profiling_native_extension/libdatadog_helpers.h index 23874282250..edd11e31954 100644 --- a/ext/ddtrace_profiling_native_extension/libdatadog_helpers.h +++ b/ext/ddtrace_profiling_native_extension/libdatadog_helpers.h @@ -23,3 +23,9 @@ inline static VALUE get_error_details_and_drop(ddog_Error *error) { ddog_Error_drop(error); return result; } + +// Used for pretty printing this Ruby enum. Returns "T_UNKNOWN_OR_MISSING_RUBY_VALUE_TYPE_ENTRY" for unknown elements. +// In practice, there's a few types that the profiler will probably never encounter, but I've added all entries of +// ruby_value_type that Ruby uses so that we can also use this for debugging. +const char *ruby_value_type_to_string(enum ruby_value_type type); +ddog_CharSlice ruby_value_type_to_char_slice(enum ruby_value_type type); diff --git a/spec/datadog/profiling/collectors/thread_context_spec.rb b/spec/datadog/profiling/collectors/thread_context_spec.rb index bc436436c80..a2e831a349c 100644 --- a/spec/datadog/profiling/collectors/thread_context_spec.rb +++ b/spec/datadog/profiling/collectors/thread_context_spec.rb @@ -73,8 +73,8 @@ def sample_after_gc described_class::Testing._native_sample_after_gc(cpu_and_wall_time_collector) end - def sample_allocation(weight:) - described_class::Testing._native_sample_allocation(cpu_and_wall_time_collector, weight) + def sample_allocation(weight:, new_object: Object.new) + described_class::Testing._native_sample_allocation(cpu_and_wall_time_collector, weight, new_object) end def thread_list @@ -980,6 +980,46 @@ def another_way_of_calling_sample(profiler_overhead_stack_thread: Thread.current expect(single_sample.labels.keys).to_not include(:end_timestamp_ns) end end + + { + T_OBJECT: Object.new, + T_CLASS: Object, + T_MODULE: Kernel, + T_FLOAT: 1.0, + T_STRING: 'Hello!', + T_REGEXP: /Hello/, + T_ARRAY: [], + T_HASH: {}, + T_STRUCT: Struct.new(:a).new, + T_BIGNUM: 2**256, + T_DATA: described_class.allocate, # ThreadContext is a T_DATA; we create here a dummy instance just as an example + T_MATCH: 'a'.match(Regexp.new('a')), + T_COMPLEX: Complex(1), + T_RATIONAL: 1/2r, + T_NIL: nil, + T_TRUE: true, + T_FALSE: false, + T_SYMBOL: :hello, + T_FIXNUM: 1, + }.each do |expected_type, object| + context "when sampling a #{expected_type}" do + it 'includes the correct ruby vm type for the passed object' do + sample_allocation(weight: 123, new_object: object) + + expect(single_sample.labels.fetch(:'ruby vm type')).to eq expected_type.to_s + end + end + end + + context 'when sampling a T_FILE' do + it 'includes the correct ruby vm type for the passed object' do + File.open(__FILE__) do |file| + sample_allocation(weight: 123, new_object: file) + end + + expect(single_sample.labels.fetch(:'ruby vm type')).to eq 'T_FILE' + end + end end describe '#thread_list' do