-
Notifications
You must be signed in to change notification settings - Fork 373
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[PROF-8917] Add support for the libdatadog crash tracker #3384
Changes from all commits
e1340de
789db6b
8afc9f2
a9c51b0
d89e876
d3bc290
4889d05
37f8082
18758de
02d8691
bc7d72c
5312591
45f5daa
a5c0ba2
fb45524
6f83a4a
6af57da
d19ecde
e670699
d37cbb8
d8b4122
6f3f8be
66e8d95
ce15ade
1b501cc
bd0537e
0b40fbf
a3081b4
680c4a3
32f02b4
356002e
7776107
520c8ab
a6ddf72
3c27b38
ed971a5
f029683
7083455
49e9f31
5197792
7cda332
b87f171
3acf413
bd7de08
09976cc
5431d2b
a2ae730
42f6ca5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,108 @@ | ||
#include <ruby.h> | ||
#include <datadog/common.h> | ||
#include <libdatadog_helpers.h> | ||
|
||
static VALUE _native_start_or_update_on_fork(int argc, VALUE *argv, DDTRACE_UNUSED VALUE _self); | ||
static VALUE _native_stop(DDTRACE_UNUSED VALUE _self); | ||
|
||
// Used to report Ruby VM crashes. | ||
// Once initialized, segfaults will be reported automatically using libdatadog. | ||
|
||
void crashtracker_init(VALUE profiling_module) { | ||
VALUE crashtracker_class = rb_define_class_under(profiling_module, "Crashtracker", rb_cObject); | ||
|
||
rb_define_singleton_method(crashtracker_class, "_native_start_or_update_on_fork", _native_start_or_update_on_fork, -1); | ||
rb_define_singleton_method(crashtracker_class, "_native_stop", _native_stop, 0); | ||
} | ||
|
||
static VALUE _native_start_or_update_on_fork(int argc, VALUE *argv, DDTRACE_UNUSED VALUE _self) { | ||
VALUE options; | ||
rb_scan_args(argc, argv, "0:", &options); | ||
|
||
VALUE exporter_configuration = rb_hash_fetch(options, ID2SYM(rb_intern("exporter_configuration"))); | ||
VALUE path_to_crashtracking_receiver_binary = rb_hash_fetch(options, ID2SYM(rb_intern("path_to_crashtracking_receiver_binary"))); | ||
VALUE ld_library_path = rb_hash_fetch(options, ID2SYM(rb_intern("ld_library_path"))); | ||
VALUE tags_as_array = rb_hash_fetch(options, ID2SYM(rb_intern("tags_as_array"))); | ||
VALUE action = rb_hash_fetch(options, ID2SYM(rb_intern("action"))); | ||
VALUE upload_timeout_seconds = rb_hash_fetch(options, ID2SYM(rb_intern("upload_timeout_seconds"))); | ||
|
||
VALUE start_action = ID2SYM(rb_intern("start")); | ||
VALUE update_on_fork_action = ID2SYM(rb_intern("update_on_fork")); | ||
|
||
ENFORCE_TYPE(exporter_configuration, T_ARRAY); | ||
ENFORCE_TYPE(tags_as_array, T_ARRAY); | ||
ENFORCE_TYPE(path_to_crashtracking_receiver_binary, T_STRING); | ||
ENFORCE_TYPE(ld_library_path, T_STRING); | ||
ENFORCE_TYPE(action, T_SYMBOL); | ||
ENFORCE_TYPE(upload_timeout_seconds, T_FIXNUM); | ||
|
||
if (action != start_action && action != update_on_fork_action) rb_raise(rb_eArgError, "Unexpected action: %+"PRIsVALUE, action); | ||
|
||
VALUE version = ddtrace_version(); | ||
ddog_prof_Endpoint endpoint = endpoint_from(exporter_configuration); | ||
|
||
// Tags are heap-allocated, so after here we can't raise exceptions otherwise we'll leak this memory | ||
// Start of exception-free zone to prevent leaks {{ | ||
ddog_Vec_Tag tags = convert_tags(tags_as_array); | ||
|
||
ddog_prof_CrashtrackerConfiguration config = { | ||
.additional_files = {}, | ||
// The Ruby VM already uses an alt stack to detect stack overflows so the crash handler must not overwrite it. | ||
// | ||
// @ivoanjo: Specifically, with `create_alt_stack = true` I saw a segfault, such as Ruby 2.6's bug with | ||
// "Process.detach(fork { exit! }).instance_variable_get(:@foo)" being turned into a | ||
// "-e:1:in `instance_variable_get': stack level too deep (SystemStackError)" by Ruby. | ||
// | ||
// The Ruby crash handler also seems to get confused when this option is enabled and | ||
// "Process.kill('SEGV', Process.pid)" gets run. | ||
.create_alt_stack = false, | ||
.endpoint = endpoint, | ||
.resolve_frames = DDOG_PROF_STACKTRACE_COLLECTION_ENABLED, | ||
.timeout_secs = FIX2INT(upload_timeout_seconds), | ||
}; | ||
|
||
ddog_prof_CrashtrackerMetadata metadata = { | ||
.profiling_library_name = DDOG_CHARSLICE_C("dd-trace-rb"), | ||
.profiling_library_version = char_slice_from_ruby_string(version), | ||
.family = DDOG_CHARSLICE_C("ruby"), | ||
.tags = &tags, | ||
}; | ||
|
||
ddog_prof_EnvVar ld_library_path_env = { | ||
.key = DDOG_CHARSLICE_C("LD_LIBRARY_PATH"), | ||
.val = char_slice_from_ruby_string(ld_library_path), | ||
}; | ||
|
||
ddog_prof_CrashtrackerReceiverConfig receiver_config = { | ||
.args = {}, | ||
.env = {.ptr = &ld_library_path_env, .len = 1}, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this might override the env rather than appending. Which would you expect to happen? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good question -- I don't particularly have a preference. On one side, inheriting the full env would be useful if the crash tracker wanted to log some extra info from there. On the other hand, we can always add anything else we want extra later. Dealer's choice? Do you think it'd be useful for me to pass along the env that Ruby has + with the addition of the ld_library_path? |
||
.path_to_receiver_binary = char_slice_from_ruby_string(path_to_crashtracking_receiver_binary), | ||
.optional_stderr_filename = {}, | ||
.optional_stdout_filename = {}, | ||
}; | ||
|
||
ddog_prof_CrashtrackerResult result = | ||
action == start_action ? | ||
ddog_prof_Crashtracker_init(config, receiver_config, metadata) : | ||
ddog_prof_Crashtracker_update_on_fork(config, receiver_config, metadata); | ||
|
||
// Clean up before potentially raising any exceptions | ||
ddog_Vec_Tag_drop(tags); | ||
ivoanjo marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// }} End of exception-free zone to prevent leaks | ||
|
||
if (result.tag == DDOG_PROF_CRASHTRACKER_RESULT_ERR) { | ||
rb_raise(rb_eRuntimeError, "Failed to start/update the crash tracker: %"PRIsVALUE, get_error_details_and_drop(&result.err)); | ||
} | ||
|
||
return Qtrue; | ||
} | ||
|
||
static VALUE _native_stop(DDTRACE_UNUSED VALUE _self) { | ||
ddog_prof_CrashtrackerResult result = ddog_prof_Crashtracker_shutdown(); | ||
|
||
if (result.tag == DDOG_PROF_CRASHTRACKER_RESULT_ERR) { | ||
rb_raise(rb_eRuntimeError, "Failed to stop the crash tracker: %"PRIsVALUE, get_error_details_and_drop(&result.err)); | ||
} | ||
|
||
return Qtrue; | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,6 +2,8 @@ | |
|
||
#include <ruby.h> | ||
|
||
static VALUE log_failure_to_process_tag(VALUE err_details); | ||
|
||
const char *ruby_value_type_to_string(enum ruby_value_type type) { | ||
return ruby_value_type_to_char_slice(type).ptr; | ||
} | ||
|
@@ -60,3 +62,87 @@ size_t read_ddogerr_string_and_drop(ddog_Error *error, char *string, size_t capa | |
ddog_Error_drop(error); | ||
return error_msg_size; | ||
} | ||
|
||
__attribute__((warn_unused_result)) | ||
ddog_prof_Endpoint endpoint_from(VALUE exporter_configuration) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would you want a way to send to file endpoints? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I can imagine some uses for it in some weird debugging cases... 🤔 From my side, I'm fine with not having that ability, so I don't particularly think it's worth the effort. |
||
ENFORCE_TYPE(exporter_configuration, T_ARRAY); | ||
|
||
VALUE exporter_working_mode = rb_ary_entry(exporter_configuration, 0); | ||
ENFORCE_TYPE(exporter_working_mode, T_SYMBOL); | ||
ID working_mode = SYM2ID(exporter_working_mode); | ||
|
||
ID agentless_id = rb_intern("agentless"); | ||
ID agent_id = rb_intern("agent"); | ||
|
||
if (working_mode != agentless_id && working_mode != agent_id) { | ||
rb_raise(rb_eArgError, "Failed to initialize transport: Unexpected working mode, expected :agentless or :agent"); | ||
} | ||
|
||
if (working_mode == agentless_id) { | ||
VALUE site = rb_ary_entry(exporter_configuration, 1); | ||
VALUE api_key = rb_ary_entry(exporter_configuration, 2); | ||
ENFORCE_TYPE(site, T_STRING); | ||
ENFORCE_TYPE(api_key, T_STRING); | ||
|
||
return ddog_prof_Endpoint_agentless(char_slice_from_ruby_string(site), char_slice_from_ruby_string(api_key)); | ||
} else { // agent_id | ||
VALUE base_url = rb_ary_entry(exporter_configuration, 1); | ||
ENFORCE_TYPE(base_url, T_STRING); | ||
|
||
return ddog_prof_Endpoint_agent(char_slice_from_ruby_string(base_url)); | ||
} | ||
} | ||
|
||
__attribute__((warn_unused_result)) | ||
ddog_Vec_Tag convert_tags(VALUE tags_as_array) { | ||
ENFORCE_TYPE(tags_as_array, T_ARRAY); | ||
|
||
long tags_count = RARRAY_LEN(tags_as_array); | ||
ddog_Vec_Tag tags = ddog_Vec_Tag_new(); | ||
|
||
for (long i = 0; i < tags_count; i++) { | ||
VALUE name_value_pair = rb_ary_entry(tags_as_array, i); | ||
|
||
if (!RB_TYPE_P(name_value_pair, T_ARRAY)) { | ||
ddog_Vec_Tag_drop(tags); | ||
ENFORCE_TYPE(name_value_pair, T_ARRAY); | ||
} | ||
|
||
// Note: We can index the array without checking its size first because rb_ary_entry returns Qnil if out of bounds | ||
VALUE tag_name = rb_ary_entry(name_value_pair, 0); | ||
VALUE tag_value = rb_ary_entry(name_value_pair, 1); | ||
|
||
if (!(RB_TYPE_P(tag_name, T_STRING) && RB_TYPE_P(tag_value, T_STRING))) { | ||
ddog_Vec_Tag_drop(tags); | ||
ENFORCE_TYPE(tag_name, T_STRING); | ||
ENFORCE_TYPE(tag_value, T_STRING); | ||
} | ||
|
||
ddog_Vec_Tag_PushResult push_result = | ||
ddog_Vec_Tag_push(&tags, char_slice_from_ruby_string(tag_name), char_slice_from_ruby_string(tag_value)); | ||
|
||
if (push_result.tag == DDOG_VEC_TAG_PUSH_RESULT_ERR) { | ||
// libdatadog validates tags and may catch invalid tags that ddtrace didn't actually catch. | ||
// We warn users about such tags, and then just ignore them. | ||
|
||
int exception_state; | ||
rb_protect(log_failure_to_process_tag, get_error_details_and_drop(&push_result.err), &exception_state); | ||
|
||
// Since we are calling into Ruby code, it may raise an exception. Ensure that dynamically-allocated tags | ||
// get cleaned before propagating the exception. | ||
if (exception_state) { | ||
ddog_Vec_Tag_drop(tags); | ||
rb_jump_tag(exception_state); // "Re-raise" exception | ||
} | ||
} | ||
} | ||
|
||
return tags; | ||
} | ||
|
||
static VALUE log_failure_to_process_tag(VALUE err_details) { | ||
VALUE datadog_module = rb_const_get(rb_cObject, rb_intern("Datadog")); | ||
VALUE logger = rb_funcall(datadog_module, rb_intern("logger"), 0); | ||
|
||
return rb_funcall(logger, rb_intern("warn"), 1, rb_sprintf("Failed to add tag to profiling request: %"PRIsVALUE, err_details)); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Makes sense, this is why this is an option here :)