From dd868ee847956d1a78961783908c2a5c797080ad Mon Sep 17 00:00:00 2001 From: Nick Ripley Date: Thu, 24 Oct 2024 21:27:48 +0000 Subject: [PATCH 1/5] fix(profiling): fix data race when accessing span for thread The ThreadSpanLinks singleton holds the active span (if one exists) for a given thread ID. The get_active_span_from_thread_id member function returns a pointer to the active span for a thread. The link_span member function sets the active span for a thread. get_active_span_from_thread_id accesses the map of spans under a mutex, but returns the pointer after releasing the mutex, meaning link_span can modify the members of the Span while the caller of get_active_span_from_thread_id is reading them. Fix this by returning a copy of the Span. Use a std::optional to wrap the return value of get_active_span_from_thread_id, rather than returning a pointer. We want to tell whether or not there actually was a span associated with the thread, but returning a pointer would require us to heap allocate the copy of the Span. --- .../profiling/stack_v2/include/thread_span_links.hpp | 3 ++- .../datadog/profiling/stack_v2/src/stack_renderer.cpp | 4 ++-- .../profiling/stack_v2/src/thread_span_links.cpp | 11 +++++++---- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/ddtrace/internal/datadog/profiling/stack_v2/include/thread_span_links.hpp b/ddtrace/internal/datadog/profiling/stack_v2/include/thread_span_links.hpp index 061cb123c03..765eaa72c91 100644 --- a/ddtrace/internal/datadog/profiling/stack_v2/include/thread_span_links.hpp +++ b/ddtrace/internal/datadog/profiling/stack_v2/include/thread_span_links.hpp @@ -2,6 +2,7 @@ #include #include +#include #include #include #include @@ -37,7 +38,7 @@ class ThreadSpanLinks void link_span(uint64_t thread_id, uint64_t span_id, uint64_t local_root_span_id, std::string span_type); - const Span* get_active_span_from_thread_id(uint64_t thread_id); + const std::optional get_active_span_from_thread_id(uint64_t thread_id); static void postfork_child(); diff --git a/ddtrace/internal/datadog/profiling/stack_v2/src/stack_renderer.cpp b/ddtrace/internal/datadog/profiling/stack_v2/src/stack_renderer.cpp index 8abc37ee55a..9a07da3aac2 100644 --- a/ddtrace/internal/datadog/profiling/stack_v2/src/stack_renderer.cpp +++ b/ddtrace/internal/datadog/profiling/stack_v2/src/stack_renderer.cpp @@ -55,8 +55,8 @@ StackRenderer::render_thread_begin(PyThreadState* tstate, ddup_push_threadinfo(sample, static_cast(thread_id), static_cast(native_id), name); ddup_push_walltime(sample, thread_state.wall_time_ns, 1); - const Span* active_span = ThreadSpanLinks::get_instance().get_active_span_from_thread_id(thread_id); - if (active_span != nullptr) { + const std::optional active_span = ThreadSpanLinks::get_instance().get_active_span_from_thread_id(thread_id); + if (active_span) { ddup_push_span_id(sample, active_span->span_id); ddup_push_local_root_span_id(sample, active_span->local_root_span_id); ddup_push_trace_type(sample, std::string_view(active_span->span_type)); diff --git a/ddtrace/internal/datadog/profiling/stack_v2/src/thread_span_links.cpp b/ddtrace/internal/datadog/profiling/stack_v2/src/thread_span_links.cpp index 602aad9fdb3..80c5ef06bc1 100644 --- a/ddtrace/internal/datadog/profiling/stack_v2/src/thread_span_links.cpp +++ b/ddtrace/internal/datadog/profiling/stack_v2/src/thread_span_links.cpp @@ -2,6 +2,7 @@ #include #include +#include #include #include @@ -19,15 +20,17 @@ ThreadSpanLinks::link_span(uint64_t thread_id, uint64_t span_id, uint64_t local_ thread_id_to_span[thread_id]->span_type = span_type; } -const Span* +const std::optional ThreadSpanLinks::get_active_span_from_thread_id(uint64_t thread_id) { std::lock_guard lock(mtx); - if (thread_id_to_span.find(thread_id) == thread_id_to_span.end()) { - return nullptr; + std::optional span; + auto it = thread_id_to_span.find(thread_id); + if (it != thread_id_to_span.end()) { + span = *(it->second); } - return thread_id_to_span[thread_id].get(); + return span; } void From 91284fdc01860b6008f390ab1ae7f3e875a2c3a0 Mon Sep 17 00:00:00 2001 From: Nick Ripley Date: Mon, 28 Oct 2024 19:52:37 +0000 Subject: [PATCH 2/5] fix: add a test for the thread-span link data race The test is pretty simplistic, just creates new span_type strings and links them to a "thread" in one thread, and reads out the span_type for that "thread" in another thread. This fails reliably for me when built with the thread sanitizer enabled without the data race fix, and passes reliably with the fix. --- .../datadog/profiling/stack_v2/CMakeLists.txt | 5 +++ .../profiling/stack_v2/test/CMakeLists.txt | 25 +++++++++++ .../stack_v2/test/thread_span_links.cpp | 44 +++++++++++++++++++ 3 files changed, 74 insertions(+) create mode 100644 ddtrace/internal/datadog/profiling/stack_v2/test/CMakeLists.txt create mode 100644 ddtrace/internal/datadog/profiling/stack_v2/test/thread_span_links.cpp diff --git a/ddtrace/internal/datadog/profiling/stack_v2/CMakeLists.txt b/ddtrace/internal/datadog/profiling/stack_v2/CMakeLists.txt index 6c330c9c970..ca9d936b705 100644 --- a/ddtrace/internal/datadog/profiling/stack_v2/CMakeLists.txt +++ b/ddtrace/internal/datadog/profiling/stack_v2/CMakeLists.txt @@ -117,3 +117,8 @@ if(LIB_INSTALL_DIR) ARCHIVE DESTINATION ${LIB_INSTALL_DIR} RUNTIME DESTINATION ${LIB_INSTALL_DIR}) endif() + +if(BUILD_TESTING) + enable_testing() + add_subdirectory(test) +endif() \ No newline at end of file diff --git a/ddtrace/internal/datadog/profiling/stack_v2/test/CMakeLists.txt b/ddtrace/internal/datadog/profiling/stack_v2/test/CMakeLists.txt new file mode 100644 index 00000000000..23fcda3eedb --- /dev/null +++ b/ddtrace/internal/datadog/profiling/stack_v2/test/CMakeLists.txt @@ -0,0 +1,25 @@ +FetchContent_Declare( + googletest + GIT_REPOSITORY https://github.com/google/googletest.git + GIT_TAG release-1.11.0) +set(gtest_force_shared_crt + ON + CACHE BOOL "" FORCE) +set(INSTALL_GTEST + OFF + CACHE BOOL "" FORCE) +FetchContent_MakeAvailable(googletest) +include(GoogleTest) +include(AnalysisFunc) + +function(dd_wrapper_add_test name) + add_executable(${name} ${ARGN}) + target_include_directories(${name} PRIVATE ../include) + target_link_libraries(${name} PRIVATE gmock gtest_main _stack_v2) + add_ddup_config(${name}) + + gtest_discover_tests(${name}) +endfunction() + +# Add the tests +dd_wrapper_add_test(thread_span_links thread_span_links.cpp) diff --git a/ddtrace/internal/datadog/profiling/stack_v2/test/thread_span_links.cpp b/ddtrace/internal/datadog/profiling/stack_v2/test/thread_span_links.cpp new file mode 100644 index 00000000000..229fa71a305 --- /dev/null +++ b/ddtrace/internal/datadog/profiling/stack_v2/test/thread_span_links.cpp @@ -0,0 +1,44 @@ +#include + +#include +#include + +#include "thread_span_links.hpp" + +static void get() { + for (int i = 0; i < 100; i++) { + std::string span_type; + for (int j = 0; j < i; j++) { + span_type.append("a"); + } + Datadog::ThreadSpanLinks::get_instance().link_span(42, 1, 2, span_type); + } +} + +static std::string set() { + std::string s; + for (int i = 0; i < 100; i++) { + auto thing = Datadog::ThreadSpanLinks::get_instance().get_active_span_from_thread_id(42); + if (!thing) { + continue; + } + s = thing->span_type; + } + return s; +} + +TEST(ThreadSpanLinksConcurrency, GetSetRace) +{ + std::thread t1(get); + std::thread t2(set); + t1.join(); + t2.join(); +} + +int +main(int argc, char** argv) +{ + ::testing::InitGoogleTest(&argc, argv); + (void)(::testing::GTEST_FLAG(death_test_style) = "threadsafe"); + return RUN_ALL_TESTS(); +} From 99c8fd6e615ec48f5af30d5fadc483f3312e8b73 Mon Sep 17 00:00:00 2001 From: Nick Ripley Date: Tue, 29 Oct 2024 14:57:17 +0000 Subject: [PATCH 3/5] chore: clang-format --- .../datadog/profiling/stack_v2/test/thread_span_links.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/ddtrace/internal/datadog/profiling/stack_v2/test/thread_span_links.cpp b/ddtrace/internal/datadog/profiling/stack_v2/test/thread_span_links.cpp index 229fa71a305..b668c3b3d80 100644 --- a/ddtrace/internal/datadog/profiling/stack_v2/test/thread_span_links.cpp +++ b/ddtrace/internal/datadog/profiling/stack_v2/test/thread_span_links.cpp @@ -5,7 +5,9 @@ #include "thread_span_links.hpp" -static void get() { +static void +get() +{ for (int i = 0; i < 100; i++) { std::string span_type; for (int j = 0; j < i; j++) { @@ -15,7 +17,9 @@ static void get() { } } -static std::string set() { +static std::string +set() +{ std::string s; for (int i = 0; i < 100; i++) { auto thing = Datadog::ThreadSpanLinks::get_instance().get_active_span_from_thread_id(42); From d42afe541226dbdf2e6db58b08e43a5816bb907e Mon Sep 17 00:00:00 2001 From: Nick Ripley Date: Tue, 29 Oct 2024 15:03:55 +0000 Subject: [PATCH 4/5] chore: add release note --- .../profiling-fix-data-race-segfault-b14f59c06c6bf9ed.yaml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 releasenotes/notes/profiling-fix-data-race-segfault-b14f59c06c6bf9ed.yaml diff --git a/releasenotes/notes/profiling-fix-data-race-segfault-b14f59c06c6bf9ed.yaml b/releasenotes/notes/profiling-fix-data-race-segfault-b14f59c06c6bf9ed.yaml new file mode 100644 index 00000000000..0dcae82aea8 --- /dev/null +++ b/releasenotes/notes/profiling-fix-data-race-segfault-b14f59c06c6bf9ed.yaml @@ -0,0 +1,5 @@ +--- +fixes: + - | + profiling: fix a data race where span information associated with a thread + was read and updated concurrently, leading to segfaults From 058697209def32709784b3de8b215f638a3900e2 Mon Sep 17 00:00:00 2001 From: Nick Ripley Date: Tue, 29 Oct 2024 11:34:39 -0400 Subject: [PATCH 5/5] chore: add trailing newline --- ddtrace/internal/datadog/profiling/stack_v2/CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ddtrace/internal/datadog/profiling/stack_v2/CMakeLists.txt b/ddtrace/internal/datadog/profiling/stack_v2/CMakeLists.txt index ca9d936b705..19f3955dab0 100644 --- a/ddtrace/internal/datadog/profiling/stack_v2/CMakeLists.txt +++ b/ddtrace/internal/datadog/profiling/stack_v2/CMakeLists.txt @@ -121,4 +121,5 @@ endif() if(BUILD_TESTING) enable_testing() add_subdirectory(test) -endif() \ No newline at end of file +endif() +