Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[PROF-7307] Include 'ruby vm type' in profiler allocation samples #3074

Merged
merged 4 commits into from
Aug 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,8 @@ static void trigger_sample_for_thread(
struct per_thread_context *thread_context,
sample_values values,
sample_type type,
long current_monotonic_wall_time_ns
long current_monotonic_wall_time_ns,
ddog_CharSlice *ruby_vm_type
);
static VALUE _native_thread_list(VALUE self);
static struct per_thread_context *get_or_create_context_for(VALUE thread, struct thread_context_collector_state *state);
Expand All @@ -199,7 +200,7 @@ static void trace_identifiers_for(struct thread_context_collector_state *state,
static bool is_type_web(VALUE root_span_type);
static VALUE _native_reset_after_fork(DDTRACE_UNUSED VALUE self, VALUE collector_instance);
static VALUE thread_list(struct thread_context_collector_state *state);
static VALUE _native_sample_allocation(VALUE self, VALUE collector_instance, VALUE sample_weight);
static VALUE _native_sample_allocation(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE sample_weight, VALUE new_object);
static VALUE _native_new_empty_thread(VALUE self);

void collectors_thread_context_init(VALUE profiling_module) {
Expand All @@ -222,7 +223,7 @@ void collectors_thread_context_init(VALUE profiling_module) {
rb_define_singleton_method(collectors_thread_context_class, "_native_inspect", _native_inspect, 1);
rb_define_singleton_method(collectors_thread_context_class, "_native_reset_after_fork", _native_reset_after_fork, 1);
rb_define_singleton_method(testing_module, "_native_sample", _native_sample, 2);
rb_define_singleton_method(testing_module, "_native_sample_allocation", _native_sample_allocation, 2);
rb_define_singleton_method(testing_module, "_native_sample_allocation", _native_sample_allocation, 3);
rb_define_singleton_method(testing_module, "_native_on_gc_start", _native_on_gc_start, 1);
rb_define_singleton_method(testing_module, "_native_on_gc_finish", _native_on_gc_finish, 1);
rb_define_singleton_method(testing_module, "_native_sample_after_gc", _native_sample_after_gc, 1);
Expand Down Expand Up @@ -463,7 +464,8 @@ void update_metrics_and_sample(
thread_context,
(sample_values) {.cpu_time_ns = cpu_time_elapsed_ns, .cpu_samples = 1, .wall_time_ns = wall_time_elapsed_ns},
SAMPLE_REGULAR,
current_monotonic_wall_time_ns
current_monotonic_wall_time_ns,
NULL
);
}

Expand Down Expand Up @@ -606,7 +608,8 @@ VALUE thread_context_collector_sample_after_gc(VALUE self_instance) {
thread_context,
(sample_values) {.cpu_time_ns = gc_cpu_time_elapsed_ns, .cpu_samples = 1, .wall_time_ns = gc_wall_time_elapsed_ns},
SAMPLE_IN_GC,
INVALID_TIME // For now we're not collecting timestamps for these events
INVALID_TIME, // For now we're not collecting timestamps for these events
NULL
);

// Mark thread as no longer in GC
Expand Down Expand Up @@ -637,13 +640,15 @@ static void trigger_sample_for_thread(
struct per_thread_context *thread_context,
sample_values values,
sample_type type,
long current_monotonic_wall_time_ns
long current_monotonic_wall_time_ns,
ddog_CharSlice *ruby_vm_type // Only used for allocation profiling, NULL for others; @ivoanjo: may want to refactor this at some point?
) {
int max_label_count =
1 + // thread id
1 + // thread name
1 + // profiler overhead
1 + // end_timestamp_ns
1 + // ruby vm type
2; // local root span id and span id
ddog_prof_Label labels[max_label_count];
int label_pos = 0;
Expand Down Expand Up @@ -713,6 +718,13 @@ static void trigger_sample_for_thread(
};
}

if (ruby_vm_type != NULL) {
labels[label_pos++] = (ddog_prof_Label) {
.key = DDOG_CHARSLICE_C("ruby vm type"),
.str = *ruby_vm_type
};
}

// The number of times `label_pos++` shows up in this function needs to match `max_label_count`. To avoid "oops I
// forgot to update max_label_count" in the future, we've also added this validation.
// @ivoanjo: I wonder if C compilers are smart enough to statically prove this check never triggers unless someone
Expand Down Expand Up @@ -1050,27 +1062,31 @@ static VALUE thread_list(struct thread_context_collector_state *state) {
return result;
}

void thread_context_collector_sample_allocation(VALUE self_instance, unsigned int sample_weight) {
void thread_context_collector_sample_allocation(VALUE self_instance, unsigned int sample_weight, VALUE new_object) {
struct thread_context_collector_state *state;
TypedData_Get_Struct(self_instance, struct thread_context_collector_state, &thread_context_collector_typed_data, state);

VALUE current_thread = rb_thread_current();

// Tag samples with the VM internal types
ddog_CharSlice ruby_vm_type = ruby_value_type_to_char_slice(rb_type(new_object));

trigger_sample_for_thread(
state,
/* thread: */ current_thread,
/* stack_from_thread: */ current_thread,
get_or_create_context_for(current_thread, state),
(sample_values) {.alloc_samples = sample_weight},
SAMPLE_REGULAR,
INVALID_TIME // For now we're not collecting timestamps for allocation events, as per profiling team internal discussions
INVALID_TIME, // For now we're not collecting timestamps for allocation events, as per profiling team internal discussions
&ruby_vm_type
);
}

// This method exists only to enable testing Datadog::Profiling::Collectors::ThreadContext behavior using RSpec.
// It SHOULD NOT be used for other purposes.
static VALUE _native_sample_allocation(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE sample_weight) {
thread_context_collector_sample_allocation(collector_instance, NUM2UINT(sample_weight));
static VALUE _native_sample_allocation(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE sample_weight, VALUE new_object) {
thread_context_collector_sample_allocation(collector_instance, NUM2UINT(sample_weight), new_object);
return Qtrue;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ void thread_context_collector_sample(
long current_monotonic_wall_time_ns,
VALUE profiler_overhead_stack_thread
);
void thread_context_collector_sample_allocation(VALUE self_instance, unsigned int sample_weight);
void thread_context_collector_sample_allocation(VALUE self_instance, unsigned int sample_weight, VALUE new_object);
VALUE thread_context_collector_sample_after_gc(VALUE self_instance);
void thread_context_collector_on_gc_start(VALUE self_instance);
void thread_context_collector_on_gc_finish(VALUE self_instance);
Expand Down
3 changes: 3 additions & 0 deletions ext/ddtrace_profiling_native_extension/extconf.rb
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,9 @@ def add_compiler_flag(flag)
# On older Rubies, there are no Ractors
$defs << '-DNO_RACTORS' if RUBY_VERSION < '3'

# On older Rubies, objects would not move
$defs << '-DNO_T_MOVED' if RUBY_VERSION < '2.7'

# On older Rubies, rb_global_vm_lock_struct did not include the owner field
$defs << '-DNO_GVL_OWNER' if RUBY_VERSION < '2.6'

Expand Down
42 changes: 42 additions & 0 deletions ext/ddtrace_profiling_native_extension/libdatadog_helpers.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#include "libdatadog_helpers.h"

#include <ruby.h>

const char *ruby_value_type_to_string(enum ruby_value_type type) {
return ruby_value_type_to_char_slice(type).ptr;
}

ddog_CharSlice ruby_value_type_to_char_slice(enum ruby_value_type type) {
switch (type) {
case(RUBY_T_NONE ): return DDOG_CHARSLICE_C("T_NONE");
case(RUBY_T_OBJECT ): return DDOG_CHARSLICE_C("T_OBJECT");
case(RUBY_T_CLASS ): return DDOG_CHARSLICE_C("T_CLASS");
case(RUBY_T_MODULE ): return DDOG_CHARSLICE_C("T_MODULE");
case(RUBY_T_FLOAT ): return DDOG_CHARSLICE_C("T_FLOAT");
case(RUBY_T_STRING ): return DDOG_CHARSLICE_C("T_STRING");
case(RUBY_T_REGEXP ): return DDOG_CHARSLICE_C("T_REGEXP");
case(RUBY_T_ARRAY ): return DDOG_CHARSLICE_C("T_ARRAY");
case(RUBY_T_HASH ): return DDOG_CHARSLICE_C("T_HASH");
case(RUBY_T_STRUCT ): return DDOG_CHARSLICE_C("T_STRUCT");
case(RUBY_T_BIGNUM ): return DDOG_CHARSLICE_C("T_BIGNUM");
case(RUBY_T_FILE ): return DDOG_CHARSLICE_C("T_FILE");
case(RUBY_T_DATA ): return DDOG_CHARSLICE_C("T_DATA");
case(RUBY_T_MATCH ): return DDOG_CHARSLICE_C("T_MATCH");
case(RUBY_T_COMPLEX ): return DDOG_CHARSLICE_C("T_COMPLEX");
case(RUBY_T_RATIONAL): return DDOG_CHARSLICE_C("T_RATIONAL");
case(RUBY_T_NIL ): return DDOG_CHARSLICE_C("T_NIL");
case(RUBY_T_TRUE ): return DDOG_CHARSLICE_C("T_TRUE");
case(RUBY_T_FALSE ): return DDOG_CHARSLICE_C("T_FALSE");
case(RUBY_T_SYMBOL ): return DDOG_CHARSLICE_C("T_SYMBOL");
case(RUBY_T_FIXNUM ): return DDOG_CHARSLICE_C("T_FIXNUM");
case(RUBY_T_UNDEF ): return DDOG_CHARSLICE_C("T_UNDEF");
case(RUBY_T_IMEMO ): return DDOG_CHARSLICE_C("T_IMEMO");
case(RUBY_T_NODE ): return DDOG_CHARSLICE_C("T_NODE");
case(RUBY_T_ICLASS ): return DDOG_CHARSLICE_C("T_ICLASS");
case(RUBY_T_ZOMBIE ): return DDOG_CHARSLICE_C("T_ZOMBIE");
#ifndef NO_T_MOVED
case(RUBY_T_MOVED ): return DDOG_CHARSLICE_C("T_MOVED");
#endif
default: return DDOG_CHARSLICE_C("BUG: Unknown value for ruby_value_type");
}
}
6 changes: 6 additions & 0 deletions ext/ddtrace_profiling_native_extension/libdatadog_helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,9 @@ inline static VALUE get_error_details_and_drop(ddog_Error *error) {
ddog_Error_drop(error);
return result;
}

// Used for pretty printing this Ruby enum. Returns "T_UNKNOWN_OR_MISSING_RUBY_VALUE_TYPE_ENTRY" for unknown elements.
// In practice, there's a few types that the profiler will probably never encounter, but I've added all entries of
// ruby_value_type that Ruby uses so that we can also use this for debugging.
const char *ruby_value_type_to_string(enum ruby_value_type type);
ddog_CharSlice ruby_value_type_to_char_slice(enum ruby_value_type type);
44 changes: 42 additions & 2 deletions spec/datadog/profiling/collectors/thread_context_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,8 @@ def sample_after_gc
described_class::Testing._native_sample_after_gc(cpu_and_wall_time_collector)
end

def sample_allocation(weight:)
described_class::Testing._native_sample_allocation(cpu_and_wall_time_collector, weight)
def sample_allocation(weight:, new_object: Object.new)
described_class::Testing._native_sample_allocation(cpu_and_wall_time_collector, weight, new_object)
end

def thread_list
Expand Down Expand Up @@ -980,6 +980,46 @@ def another_way_of_calling_sample(profiler_overhead_stack_thread: Thread.current
expect(single_sample.labels.keys).to_not include(:end_timestamp_ns)
end
end

{
T_OBJECT: Object.new,
T_CLASS: Object,
T_MODULE: Kernel,
T_FLOAT: 1.0,
T_STRING: 'Hello!',
T_REGEXP: /Hello/,
T_ARRAY: [],
T_HASH: {},
T_STRUCT: Struct.new(:a).new,
T_BIGNUM: 2**256,
T_DATA: described_class.allocate, # ThreadContext is a T_DATA; we create here a dummy instance just as an example
T_MATCH: 'a'.match(Regexp.new('a')),
T_COMPLEX: Complex(1),
T_RATIONAL: 1/2r,
T_NIL: nil,
T_TRUE: true,
T_FALSE: false,
T_SYMBOL: :hello,
T_FIXNUM: 1,
}.each do |expected_type, object|
context "when sampling a #{expected_type}" do
it 'includes the correct ruby vm type for the passed object' do
sample_allocation(weight: 123, new_object: object)

expect(single_sample.labels.fetch(:'ruby vm type')).to eq expected_type.to_s
end
end
end

context 'when sampling a T_FILE' do
it 'includes the correct ruby vm type for the passed object' do
File.open(__FILE__) do |file|
sample_allocation(weight: 123, new_object: file)
end

expect(single_sample.labels.fetch(:'ruby vm type')).to eq 'T_FILE'
end
end
end

describe '#thread_list' do
Expand Down