Skip to content

Commit

Permalink
[PROF-8667] Heap Profiling - Part 4 - Labels
Browse files Browse the repository at this point in the history
  • Loading branch information
AlexJF committed Dec 15, 2023
1 parent 6674f5c commit 18d37c7
Show file tree
Hide file tree
Showing 9 changed files with 170 additions and 42 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1210,7 +1210,7 @@ void thread_context_collector_sample_allocation(VALUE self_instance, unsigned in
}
}

track_object(state->recorder_instance, new_object, sample_weight);
track_object(state->recorder_instance, new_object, sample_weight, class_name);

trigger_sample_for_thread(
state,
Expand Down
66 changes: 49 additions & 17 deletions ext/ddtrace_profiling_native_extension/heap_recorder.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include "ruby_helpers.h"
#include <errno.h>
#include "collectors_stack.h"
#include "libdatadog_helpers.h"

#define MAX_QUEUE_LIMIT 10000

Expand Down Expand Up @@ -89,9 +90,10 @@ static void heap_record_free(heap_record*);
typedef struct {
long obj_id;
heap_record *heap_record;
live_object_data object_data;
// Has ownership of this, needs to clean-it-up if not transferred.
live_object_data *object_data;
} object_record;
static object_record* object_record_new(long, heap_record*, live_object_data);
static object_record* object_record_new(long, heap_record*, live_object_data*);
static void object_record_free(object_record*);

// Allows storing data passed to ::start_heap_allocation_recording to make it accessible to
Expand All @@ -100,14 +102,16 @@ static void object_record_free(object_record*);
// obj_id != 0 flags this struct as holding a valid partial heap recording.
typedef struct {
long obj_id;
live_object_data object_data;
// Has ownership of this, needs to clean-it-up if not transferred.
live_object_data *object_data;
} partial_heap_recording;

typedef struct {
// Has ownership of this, needs to clean-it-up if not transferred.
heap_record *heap_record;
long obj_id;
live_object_data object_data;
// Has ownership of this, needs to clean-it-up if not transferred.
live_object_data *object_data;
} uncommitted_sample;

struct heap_recorder {
Expand Down Expand Up @@ -141,9 +145,11 @@ static int st_object_record_entry_free(st_data_t, st_data_t, st_data_t);
static int st_object_record_entry_free_if_invalid(st_data_t, st_data_t, st_data_t);
static int st_object_records_iterate(st_data_t, st_data_t, st_data_t);
static int update_object_record_entry(st_data_t*, st_data_t*, st_data_t, int);
static void commit_allocation(heap_recorder*, heap_record*, long, live_object_data);
static void commit_allocation(heap_recorder*, heap_record*, long, live_object_data*);
static void flush_queue(heap_recorder*);
static bool enqueue_allocation(heap_recorder*, heap_record*, long, live_object_data);
static bool enqueue_allocation(heap_recorder*, heap_record*, long, live_object_data*);
static live_object_data* live_object_data_new(unsigned int, ddog_CharSlice, size_t);
static void live_object_data_free(live_object_data*);

// ==========================
// Heap Recorder External API
Expand All @@ -164,7 +170,7 @@ heap_recorder* heap_recorder_new(void) {
recorder->active_recording = (partial_heap_recording) {
.obj_id = 0, // 0 is actually the obj_id of false, but we'll never track that one in heap so we use
// it as invalid/unset value.
.object_data = {0},
.object_data = NULL,
};
recorder->queued_samples = ruby_xcalloc(MAX_QUEUE_LIMIT, sizeof(uncommitted_sample));
recorder->queued_samples_len = 0;
Expand All @@ -177,12 +183,22 @@ void heap_recorder_free(heap_recorder *heap_recorder) {
return;
}

// Clean-up all object records
st_foreach(heap_recorder->object_records, st_object_record_entry_free, 0);
st_free_table(heap_recorder->object_records);

// Clean-up all heap records (this includes those only referred to by queued_samples)
st_foreach(heap_recorder->heap_records, st_heap_record_entry_free, 0);
st_free_table(heap_recorder->heap_records);

// Clean-up dangling object datas that are not part of object records yet
if (heap_recorder->active_recording.obj_id != 0) {
live_object_data_free(heap_recorder->active_recording.object_data);
}
for (size_t i = 0; i < heap_recorder->queued_samples_len; i++) {
live_object_data_free(heap_recorder->queued_samples[i].object_data);
}

pthread_mutex_destroy(&heap_recorder->records_mutex);

ruby_xfree(heap_recorder->queued_samples);
Expand Down Expand Up @@ -220,7 +236,7 @@ void heap_recorder_after_fork(heap_recorder *heap_recorder) {
heap_recorder->records_mutex = (pthread_mutex_t) PTHREAD_MUTEX_INITIALIZER;
}

void start_heap_allocation_recording(heap_recorder *heap_recorder, VALUE new_obj, unsigned int weight) {
void start_heap_allocation_recording(heap_recorder* heap_recorder, VALUE new_obj, unsigned int weight, ddog_CharSlice alloc_class) {
if (heap_recorder == NULL) {
return;
}
Expand All @@ -232,9 +248,7 @@ void start_heap_allocation_recording(heap_recorder *heap_recorder, VALUE new_obj

heap_recorder->active_recording = (partial_heap_recording) {
.obj_id = FIX2LONG(ruby_obj_id),
.object_data = (live_object_data) {
.weight = weight,
},
.object_data = live_object_data_new(weight, alloc_class, rb_gc_count()),
};
}

Expand Down Expand Up @@ -265,6 +279,8 @@ void end_heap_allocation_recording(struct heap_recorder *heap_recorder, ddog_pro
// We weren't able to get a lock
bool enqueued = enqueue_allocation(heap_recorder, heap_record, obj_id, active_recording->object_data);
if (!enqueued) {
// Need to clean up pending stuff
live_object_data_free(active_recording->object_data);
cleanup_heap_record_if_unused(heap_recorder, heap_record);
}
return;
Expand Down Expand Up @@ -443,8 +459,9 @@ static int update_object_record_entry(DDTRACE_UNUSED st_data_t *key, st_data_t *
return ST_CONTINUE;
}

// Expects transfer of ownership of object_data
// WARN: Expects records_mutex to be held
static void commit_allocation(heap_recorder *heap_recorder, heap_record *heap_record, long obj_id, live_object_data object_data) {
static void commit_allocation(heap_recorder *heap_recorder, heap_record *heap_record, long obj_id, live_object_data *object_data) {
// Update object_records
object_record_update_data update_data = (object_record_update_data) {
.heap_recorder = heap_recorder,
Expand Down Expand Up @@ -539,8 +556,9 @@ static void flush_queue(heap_recorder *heap_recorder) {
heap_recorder->queued_samples_len = 0;
}

// Expects transfer of ownership of object_data.
// WARN: This can get called during Ruby GC. NO HEAP ALLOCATIONS OR EXCEPTIONS ARE ALLOWED.
static bool enqueue_allocation(heap_recorder *heap_recorder, heap_record *heap_record, long obj_id, live_object_data object_data) {
static bool enqueue_allocation(heap_recorder *heap_recorder, heap_record *heap_record, long obj_id, live_object_data *object_data) {
if (heap_recorder->queued_samples_len >= MAX_QUEUE_LIMIT) {
// TODO: Some telemetry here?
return false;
Expand All @@ -555,6 +573,20 @@ static bool enqueue_allocation(heap_recorder *heap_recorder, heap_record *heap_r
return true;
}

static live_object_data* live_object_data_new(unsigned int weight, ddog_CharSlice alloc_class, size_t alloc_gen) {
live_object_data* object_data = ruby_xmalloc(sizeof(live_object_data));
(*object_data) = (live_object_data) {
.weight = weight,
.alloc_class = string_from_char_slice(alloc_class),
.alloc_gen = alloc_gen,
};
return object_data;
}

static void live_object_data_free(live_object_data *object_data) {
ruby_xfree(object_data->alloc_class);
}

// ===============
// Heap Record API
// ===============
Expand All @@ -574,7 +606,7 @@ void heap_record_free(heap_record *record) {
// =================
// Object Record API
// =================
object_record* object_record_new(long obj_id, heap_record *heap_record, live_object_data object_data) {
object_record* object_record_new(long obj_id, heap_record *heap_record, live_object_data *object_data) {
object_record *record = ruby_xcalloc(1, sizeof(object_record));
record->obj_id = obj_id;
record->heap_record = heap_record;
Expand All @@ -583,6 +615,7 @@ object_record* object_record_new(long obj_id, heap_record *heap_record, live_obj
}

void object_record_free(object_record *record) {
live_object_data_free(record->object_data);
ruby_xfree(record);
}

Expand Down Expand Up @@ -637,7 +670,6 @@ st_index_t ddog_location_hash(ddog_prof_Location location, st_index_t seed) {
return hash;
}


// ==============
// Heap Stack API
// ==============
Expand All @@ -654,8 +686,8 @@ heap_stack* heap_stack_new(ddog_prof_Slice_Location locations) {
for (uint16_t i = 0; i < stack->frames_len; i++) {
const ddog_prof_Location *location = &locations.ptr[i];
stack->frames[i] = (heap_frame) {
.name = ruby_strndup(location->function.name.ptr, location->function.name.len),
.filename = ruby_strndup(location->function.filename.ptr, location->function.filename.len),
.name = string_from_char_slice(location->function.name),
.filename = string_from_char_slice(location->function.filename),
// ddog_prof_Location is a int64_t. We don't expect to have to profile files with more than
// 2M lines so this cast should be fairly safe?
.line = (int32_t) location->line,
Expand Down
12 changes: 10 additions & 2 deletions ext/ddtrace_profiling_native_extension/heap_recorder.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,21 @@ typedef struct live_object_data {
// Example: If we were sampling every 50 objects, then each sampled object
// could be seen as being representative of 50 objects.
unsigned int weight;

// The class of the object that we're tracking.
char* alloc_class;

// The GC allocation gen in which we saw this object being allocated.
//
// This enables us to calculate the age of this object in terms of GC executions.
size_t alloc_gen;
} live_object_data;

// Data that is made available to iterators of heap recorder data for each live object
// tracked therein.
typedef struct {
ddog_prof_Slice_Location locations;
live_object_data object_data;
const live_object_data *object_data;
} heap_recorder_iteration_data;

// Initialize a new heap recorder.
Expand All @@ -56,7 +64,7 @@ void heap_recorder_after_fork(heap_recorder *heap_recorder);
// The sampling weight of this object.
//
// WARN: It needs to be paired with a ::end_heap_allocation_recording call.
void start_heap_allocation_recording(heap_recorder *heap_recorder, VALUE new_obj, unsigned int weight);
void start_heap_allocation_recording(heap_recorder *heap_recorder, VALUE new_obj, unsigned int weight, ddog_CharSlice alloc_class);

// End a previously started heap allocation recording on the heap recorder.
//
Expand Down
4 changes: 4 additions & 0 deletions ext/ddtrace_profiling_native_extension/libdatadog_helpers.c
Original file line number Diff line number Diff line change
Expand Up @@ -60,3 +60,7 @@ size_t read_ddogerr_string_and_drop(ddog_Error *error, char *string, size_t capa
ddog_Error_drop(error);
return error_msg_size;
}

char* string_from_char_slice(ddog_CharSlice slice) {
return ruby_strndup(slice.ptr, slice.len);
}
4 changes: 4 additions & 0 deletions ext/ddtrace_profiling_native_extension/libdatadog_helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,7 @@ size_t read_ddogerr_string_and_drop(ddog_Error *error, char *string, size_t capa
// ruby_value_type that Ruby uses so that we can also use this for debugging.
const char *ruby_value_type_to_string(enum ruby_value_type type);
ddog_CharSlice ruby_value_type_to_char_slice(enum ruby_value_type type);

// Returns a dynamically allocated string from the provided char slice.
// WARN: The returned string must be explicitly freed with ruby_xfree.
char* string_from_char_slice(ddog_CharSlice slice);
39 changes: 31 additions & 8 deletions ext/ddtrace_profiling_native_extension/stack_recorder.c
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ static VALUE _native_reset_after_fork(DDTRACE_UNUSED VALUE self, VALUE recorder_
static void serializer_set_start_timestamp_for_next_profile(struct stack_recorder_state *state, ddog_Timespec start_time);
static VALUE _native_record_endpoint(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE local_root_span_id, VALUE endpoint);
static void reset_profile(ddog_prof_Profile *profile, ddog_Timespec *start_time /* Can be null */);
static VALUE _native_track_object(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE new_obj, VALUE weight);
static VALUE _native_track_object(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE new_obj, VALUE weight, VALUE alloc_class);
static VALUE _native_check_heap_hashes(DDTRACE_UNUSED VALUE _self, VALUE locations);
static VALUE _native_start_fake_slow_heap_serialization(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance);
static VALUE _native_end_fake_slow_heap_serialization(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance);
Expand Down Expand Up @@ -247,7 +247,7 @@ void stack_recorder_init(VALUE profiling_module) {
rb_define_singleton_method(testing_module, "_native_slot_one_mutex_locked?", _native_is_slot_one_mutex_locked, 1);
rb_define_singleton_method(testing_module, "_native_slot_two_mutex_locked?", _native_is_slot_two_mutex_locked, 1);
rb_define_singleton_method(testing_module, "_native_record_endpoint", _native_record_endpoint, 3);
rb_define_singleton_method(testing_module, "_native_track_object", _native_track_object, 3);
rb_define_singleton_method(testing_module, "_native_track_object", _native_track_object, 4);
rb_define_singleton_method(testing_module, "_native_check_heap_hashes", _native_check_heap_hashes, 1);
rb_define_singleton_method(testing_module, "_native_start_fake_slow_heap_serialization",
_native_start_fake_slow_heap_serialization, 1);
Expand Down Expand Up @@ -510,13 +510,13 @@ void record_sample(VALUE recorder_instance, ddog_prof_Slice_Location locations,
}
}

void track_object(VALUE recorder_instance, VALUE new_object, unsigned int sample_weight) {
void track_object(VALUE recorder_instance, VALUE new_object, unsigned int sample_weight, ddog_CharSlice alloc_class) {
struct stack_recorder_state *state;
TypedData_Get_Struct(recorder_instance, struct stack_recorder_state, &stack_recorder_typed_data, state);
// FIXME: Heap sampling currently has to be done in 2 parts because the construction of locations is happening
// very late in the allocation-sampling path (which is shared with the cpu sampling path). This can
// be fixed with some refactoring but for now this leads to a less impactful change.
start_heap_allocation_recording(state->heap_recorder, new_object, sample_weight);
start_heap_allocation_recording(state->heap_recorder, new_object, sample_weight, alloc_class);
}

void record_endpoint(VALUE recorder_instance, uint64_t local_root_span_id, ddog_CharSlice endpoint) {
Expand All @@ -541,8 +541,11 @@ void record_endpoint(VALUE recorder_instance, uint64_t local_root_span_id, ddog_
typedef struct heap_recorder_iteration_context {
struct stack_recorder_state *state;
ddog_prof_Profile *profile;

bool error;
char error_msg[MAX_LEN_HEAP_ITERATION_ERROR_MSG];

size_t profile_gen;
} heap_recorder_iteration_context;

static bool add_heap_sample_to_active_profile_without_gvl(heap_recorder_iteration_data iteration_data, void *extra_arg) {
Expand All @@ -551,14 +554,33 @@ static bool add_heap_sample_to_active_profile_without_gvl(heap_recorder_iteratio
int64_t metric_values[ALL_VALUE_TYPES_COUNT] = {0};
uint8_t *position_for = context->state->position_for;

metric_values[position_for[HEAP_SAMPLES_VALUE_ID]] = iteration_data.object_data.weight;
metric_values[position_for[HEAP_SAMPLES_VALUE_ID]] = iteration_data.object_data->weight;

const live_object_data *object_data = iteration_data.object_data;
ddog_prof_Label labels[2];
size_t num_labels = 2;

labels[0] = (ddog_prof_Label ){
.key = DDOG_CHARSLICE_C("allocation class"),
.str = (ddog_CharSlice) {
.ptr = object_data->alloc_class,
.len = strlen(object_data->alloc_class),
},
};
labels[1] = (ddog_prof_Label ){
.key = DDOG_CHARSLICE_C("gc gen age"),
.num = context->profile_gen - object_data->alloc_gen,
};

ddog_prof_Profile_Result result = ddog_prof_Profile_add(
context->profile,
(ddog_prof_Sample) {
.locations = iteration_data.locations,
.values = (ddog_Slice_I64) {.ptr = metric_values, .len = context->state->enabled_values_count},
.labels = {0},
.labels = (ddog_prof_Slice_Label) {
.ptr = labels,
.len = num_labels,
}
},
0
);
Expand All @@ -580,6 +602,7 @@ static void build_heap_profile_without_gvl(struct stack_recorder_state *state, d
.profile = profile,
.error = false,
.error_msg = {0},
.profile_gen = rb_gc_count(),
};
heap_recorder_for_each_live_object(state->heap_recorder, add_heap_sample_to_active_profile_without_gvl, (void*) &iteration_context, false);
if (iteration_context.error) {
Expand Down Expand Up @@ -741,9 +764,9 @@ static VALUE _native_record_endpoint(DDTRACE_UNUSED VALUE _self, VALUE recorder_
return Qtrue;
}

static VALUE _native_track_object(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE new_obj, VALUE weight) {
static VALUE _native_track_object(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE new_obj, VALUE weight, VALUE alloc_class) {
ENFORCE_TYPE(weight, T_FIXNUM);
track_object(recorder_instance, new_obj, NUM2UINT(weight));
track_object(recorder_instance, new_obj, NUM2UINT(weight), char_slice_from_ruby_string(alloc_class));
return Qtrue;
}

Expand Down
2 changes: 1 addition & 1 deletion ext/ddtrace_profiling_native_extension/stack_recorder.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,5 +22,5 @@ typedef struct sample_labels {

void record_sample(VALUE recorder_instance, ddog_prof_Slice_Location locations, sample_values values, sample_labels labels);
void record_endpoint(VALUE recorder_instance, uint64_t local_root_span_id, ddog_CharSlice endpoint);
void track_object(VALUE recorder_instance, VALUE new_object, unsigned int sample_weight);
void track_object(VALUE recorder_instance, VALUE new_object, unsigned int sample_weight, ddog_CharSlice alloc_class);
VALUE enforce_recorder_instance(VALUE object);
Original file line number Diff line number Diff line change
Expand Up @@ -572,10 +572,6 @@
it 'records live heap objects' do
stub_const('CpuAndWallTimeWorkerSpec::TestStruct', Struct.new(:foo))

# Warm this up to remove VM-related allocations
# TODO: Remove this when we can match on allocation class
CpuAndWallTimeWorkerSpec::TestStruct.new

start

live_objects = Array.new(test_num_allocated_object)
Expand All @@ -587,7 +583,10 @@

test_struct_heap_sample = lambda { |sample|
first_frame = sample.locations.first
first_frame.lineno == allocation_line && first_frame.path == __FILE__ && first_frame.base_label == 'new' &&
first_frame.lineno == allocation_line &&
first_frame.path == __FILE__ &&
first_frame.base_label == 'new' &&
sample.labels[:'allocation class'] == 'CpuAndWallTimeWorkerSpec::TestStruct' &&
(sample.values[:'heap-live-samples'] || 0) > 0
}

Expand Down
Loading

0 comments on commit 18d37c7

Please sign in to comment.