From 152a0ad3dff30bd4550dbbceb3d2296b4e61f55d Mon Sep 17 00:00:00 2001 From: Jun Wu Date: Fri, 7 Oct 2022 10:36:00 -0700 Subject: [PATCH] change logging in exit handlers to not avoid locking Summary: We have seen deadlock running `terminationHandler` -> `hasSubscribers` in 2 threads. It's unclear which other thread is holding the lock. To make things easier to debug next time, let's change terminationHandler (and also main.cpp) to bypass the logging lock and write to stderr directly. Related threads (all threads in P536343453): Thread 11 (LWP 3275661): #0 syscall () at ../sysdeps/unix/sysv/linux/x86_64/syscall.S:38 #1 0x0000000001cc995b in folly::detail::(anonymous namespace)::nativeFutexWaitImpl (addr=, expected=, absSystemTime=, absSteadyTime=, waitMask=) at fbcode/folly/detail/Futex.cpp:126 #2 folly::detail::futexWaitImpl (futex=0x89, futex@entry=0x7f1c3ac2ef90, expected=994748889, absSystemTime=absSystemTime@entry=0x0, absSteadyTime=, absSteadyTime@entry=0x0, waitMask=waitMask@entry=1) at fbcode/folly/detail/Futex.cpp:254 #3 0x0000000001d34bce in folly::detail::futexWait > (futex=0x7f1c3ac2ef90, expected=137, waitMask=1) at buck-out/v2/gen/fbcode/110b607930331a92/folly/detail/__futex__/headers/folly/detail/Futex-inl.h:96 #4 folly::SharedMutexImpl::WaitForever::doWait (this=, futex=..., expected=137, waitMask=1) at buck-out/v2/gen/fbcode/110b607930331a92/folly/__shared_mutex__/headers/folly/SharedMutex.h:718 #5 folly::SharedMutexImpl::futexWaitForZeroBits::WaitForever> (this=0x7f1c3ac2ef90, state=@0x7f1c149f88e4: 118379409, goal=128, waitMask=1, ctx=...) at buck-out/v2/gen/fbcode/110b607930331a92/folly/__shared_mutex__/headers/folly/SharedMutex.h:1184 #6 0x0000000001cd42b2 in folly::SharedMutexImpl::yieldWaitForZeroBits::WaitForever> (this=0x7f1c3ac2ef90, state=@0x7f1c149f88e4: 118379409, goal=128, waitMask=1, ctx=...) at buck-out/v2/gen/fbcode/110b607930331a92/folly/__shared_mutex__/headers/folly/SharedMutex.h:1151 #7 folly::SharedMutexImpl::waitForZeroBits::WaitForever> (this=0x7f1c3ac2ef90, state=@0x7f1c149f88e4: 118379409, goal=128, waitMask=1, ctx=...) at buck-out/v2/gen/fbcode/110b607930331a92/folly/__shared_mutex__/headers/folly/SharedMutex.h:1109 #8 0x0000000001e7e14c in folly::SharedMutexImpl::lockSharedImpl::WaitForever> (this=0x7f1c3ac2ef90, state=@0x7f1c149f88e4: 118379409, token=0x0, ctx=...) at buck-out/v2/gen/fbcode/110b607930331a92/folly/__shared_mutex__/headers/folly/SharedMutex.h:1664 #9 folly::SharedMutexImpl::lockSharedImpl::WaitForever> (this=0x7f1c3ac2ef90, token=0x0, ctx=...) at buck-out/v2/gen/fbcode/110b607930331a92/folly/__shared_mutex__/headers/folly/SharedMutex.h:1356 #10 folly::SharedMutexImpl::lock_shared (this=0x7f1c3ac2ef90) at buck-out/v2/gen/fbcode/110b607930331a92/folly/__shared_mutex__/headers/folly/SharedMutex.h:495 #11 std::shared_lock >::shared_lock (this=, __m=...) at fbcode/third-party-buck/platform010/build/libgcc/include/c++/trunk/shared_mutex:727 #12 0x0000000002d765fd in folly::LockedPtr > const, folly::detail::SynchronizedLockPolicy<(folly::detail::SynchronizedMutexLevel)2, (folly::detail::SynchronizedMutexMethod)0> >::doLock, std::shared_lock >, folly::detail::SynchronizedLockPolicy<(folly::detail::SynchronizedMutexLevel)2, (folly::detail::SynchronizedMutexMethod)0>, 0> (mutex=...) at buck-out/v2/gen/fbcode/110b607930331a92/folly/__synchronized__/headers/folly/Synchronized.h:1493 #13 folly::LockedPtr > const, folly::detail::SynchronizedLockPolicy<(folly::detail::SynchronizedMutexLevel)2, (folly::detail::SynchronizedMutexMethod)0> >::LockedPtr (this=0x7f1c149f8928, parent=) at buck-out/v2/gen/fbcode/110b607930331a92/folly/__synchronized__/headers/folly/Synchronized.h:1272 #14 folly::SynchronizedBase >, (folly::detail::SynchronizedMutexLevel)2>::rlock (this=) at buck-out/v2/gen/fbcode/110b607930331a92/folly/__synchronized__/headers/folly/Synchronized.h:229 #15 watchman::Publisher::hasSubscribers (this=) at fbcode/watchman/PubSub.cpp:117 #16 0x0000000002eca798 in watchman::Log::log (this=, level=level@entry=watchman::ABORT, args=..., args=..., args=...) at buck-out/v2/gen/fbcode/110b607930331a92/watchman/__logging__/headers/watchman/Logging.h:42 #17 0x0000000002ec9ba7 in watchman::log (level=watchman::ABORT, args=..., args=..., args=...) at buck-out/v2/gen/fbcode/110b607930331a92/watchman/__logging__/headers/watchman/Logging.h:121 #18 (anonymous namespace)::terminationHandler () at fbcode/watchman/SignalHandler.cpp:159 #19 0x00007f1c3b0c7b3a in __cxxabiv1::__terminate (handler=) at ../../.././libstdc++-v3/libsupc++/eh_terminate.cc:48 #20 0x00007f1c3b0c7ba5 in std::terminate () at ../../.././libstdc++-v3/libsupc++/eh_terminate.cc:58 #21 0x0000000001c38c8b in __clang_call_terminate () #22 0x0000000003284c9e in folly::detail::terminate_with_, std::allocator >&> (args=...) at buck-out/v2/gen/fbcode/110b607930331a92/folly/lang/__exception__/headers/folly/lang/Exception.h:93 #23 0x0000000003281bae in folly::terminate_with, std::allocator >&> (args=...) at buck-out/v2/gen/fbcode/110b607930331a92/folly/lang/__exception__/headers/folly/lang/Exception.h:123 #24 folly::SingletonVault::fireShutdownTimer (this=) at fbcode/folly/Singleton.cpp:499 #25 0x0000000003281ad9 in folly::(anonymous namespace)::fireShutdownSignalHelper (sigval=...) at fbcode/folly/Singleton.cpp:454 #26 0x00007f1c3b42b939 in timer_sigev_thread (arg=) at ../sysdeps/unix/sysv/linux/timer_routines.c:55 #27 0x00007f1c3b41fc0f in start_thread (arg=) at pthread_create.c:434 #28 0x00007f1c3b4b21dc in clone3 () at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:81 ... Thread 1 (LWP 3201992): #0 syscall () at ../sysdeps/unix/sysv/linux/x86_64/syscall.S:38 #1 0x0000000001cc995b in folly::detail::(anonymous namespace)::nativeFutexWaitImpl (addr=, expected=, absSystemTime=, absSteadyTime=, waitMask=) at fbcode/folly/detail/Futex.cpp:126 #2 folly::detail::futexWaitImpl (futex=0x89, futex@entry=0x7f1c3ac2ef90, expected=994748889, absSystemTime=absSystemTime@entry=0x0, absSteadyTime=, absSteadyTime@entry=0x0, waitMask=waitMask@entry=1) at fbcode/folly/detail/Futex.cpp:254 #3 0x0000000001d34bce in folly::detail::futexWait > (futex=0x7f1c3ac2ef90, expected=137, waitMask=1) at buck-out/v2/gen/fbcode/110b607930331a92/folly/detail/__futex__/headers/folly/detail/Futex-inl.h:96 #4 folly::SharedMutexImpl::WaitForever::doWait (this=, futex=..., expected=137, waitMask=1) at buck-out/v2/gen/fbcode/110b607930331a92/folly/__shared_mutex__/headers/folly/SharedMutex.h:718 #5 folly::SharedMutexImpl::futexWaitForZeroBits::WaitForever> (this=0x7f1c3ac2ef90, state=@0x7ffd2d5be924: 118379408, goal=128, waitMask=1, ctx=...) at buck-out/v2/gen/fbcode/110b607930331a92/folly/__shared_mutex__/headers/folly/SharedMutex.h:1184 #6 0x0000000001cd42b2 in folly::SharedMutexImpl::yieldWaitForZeroBits::WaitForever> (this=0x7f1c3ac2ef90, state=@0x7ffd2d5be924: 118379408, goal=128, waitMask=1, ctx=...) at buck-out/v2/gen/fbcode/110b607930331a92/folly/__shared_mutex__/headers/folly/SharedMutex.h:1151 #7 folly::SharedMutexImpl::waitForZeroBits::WaitForever> (this=0x7f1c3ac2ef90, state=@0x7ffd2d5be924: 118379408, goal=128, waitMask=1, ctx=...) at buck-out/v2/gen/fbcode/110b607930331a92/folly/__shared_mutex__/headers/folly/SharedMutex.h:1109 #8 0x0000000001e7e14c in folly::SharedMutexImpl::lockSharedImpl::WaitForever> (this=0x7f1c3ac2ef90, state=@0x7ffd2d5be924: 118379408, token=0x0, ctx=...) at buck-out/v2/gen/fbcode/110b607930331a92/folly/__shared_mutex__/headers/folly/SharedMutex.h:1664 #9 folly::SharedMutexImpl::lockSharedImpl::WaitForever> (this=0x7f1c3ac2ef90, token=0x0, ctx=...) at buck-out/v2/gen/fbcode/110b607930331a92/folly/__shared_mutex__/headers/folly/SharedMutex.h:1356 #10 folly::SharedMutexImpl::lock_shared (this=0x7f1c3ac2ef90) at buck-out/v2/gen/fbcode/110b607930331a92/folly/__shared_mutex__/headers/folly/SharedMutex.h:495 #11 std::shared_lock >::shared_lock (this=, __m=...) at fbcode/third-party-buck/platform010/build/libgcc/include/c++/trunk/shared_mutex:727 #12 0x0000000002d765fd in folly::LockedPtr > const, folly::detail::SynchronizedLockPolicy<(folly::detail::SynchronizedMutexLevel)2, (folly::detail::SynchronizedMutexMethod)0> >::doLock, std::shared_lock >, folly::detail::SynchronizedLockPolicy<(folly::detail::SynchronizedMutexLevel)2, (folly::detail::SynchronizedMutexMethod)0>, 0> (mutex=...) at buck-out/v2/gen/fbcode/110b607930331a92/folly/__synchronized__/headers/folly/Synchronized.h:1493 #13 folly::LockedPtr > const, folly::detail::SynchronizedLockPolicy<(folly::detail::SynchronizedMutexLevel)2, (folly::detail::SynchronizedMutexMethod)0> >::LockedPtr (this=0x7ffd2d5be968, parent=) at buck-out/v2/gen/fbcode/110b607930331a92/folly/__synchronized__/headers/folly/Synchronized.h:1272 #14 folly::SynchronizedBase >, (folly::detail::SynchronizedMutexLevel)2>::rlock (this=) at buck-out/v2/gen/fbcode/110b607930331a92/folly/__synchronized__/headers/folly/Synchronized.h:229 #15 watchman::Publisher::hasSubscribers (this=) at fbcode/watchman/PubSub.cpp:117 #16 0x0000000002ecac20 in watchman::Log::log (this=, level=level@entry=watchman::ABORT, args=...) at buck-out/v2/gen/fbcode/110b607930331a92/watchman/__logging__/headers/watchman/Logging.h:42 #17 0x0000000002ec9b24 in watchman::log (level=watchman::ABORT, args=...) at buck-out/v2/gen/fbcode/110b607930331a92/watchman/__logging__/headers/watchman/Logging.h:121 #18 (anonymous namespace)::terminationHandler () at fbcode/watchman/SignalHandler.cpp:165 #19 0x00007f1c3b0c7b3a in __cxxabiv1::__terminate (handler=) at ../../.././libstdc++-v3/libsupc++/eh_terminate.cc:48 #20 0x00007f1c3b0c7ba5 in std::terminate () at ../../.././libstdc++-v3/libsupc++/eh_terminate.cc:58 #21 0x0000000002d8cde1 in std::thread::~thread (this=0x7f1c3ac2ef90) at fbcode/third-party-buck/platform010/build/libgcc/include/c++/trunk/bits/std_thread.h:152 #22 0x00007f1c3b3cc8f8 in __run_exit_handlers (status=1, listp=0x7f1c3b598658 <__exit_funcs>, run_list_atexit=, run_dtors=) at exit.c:113 #23 0x00007f1c3b3cca0a in __GI_exit (status=) at exit.c:143 #24 0x00007f1c3b3b165e in __libc_start_call_main (main=0x2d11220 , argc=2, argv=0x7ffd2d5bec78) at ../sysdeps/nptl/libc_start_call_main.h:74 #25 0x00007f1c3b3b1718 in __libc_start_main_impl (main=0x2d11220 , argc=2, argv=0x7ffd2d5bec78, init=, fini=, rtld_fini=, stack_end=0x7ffd2d5bec68) at ../csu/libc-start.c:409 #26 0x0000000002d0e181 in _start () at ../sysdeps/x86_64/start.S:116 Reviewed By: xavierd Differential Revision: D40166374 fbshipit-source-id: 7017e20234e5e0a9532eb61a63ac49ac0020d443 --- watchman/Logging.h | 8 ++++++++ watchman/SignalHandler.cpp | 15 +++++++-------- watchman/main.cpp | 8 +++----- 3 files changed, 18 insertions(+), 13 deletions(-) diff --git a/watchman/Logging.h b/watchman/Logging.h index 6585b5cbc811..c159728f671b 100644 --- a/watchman/Logging.h +++ b/watchman/Logging.h @@ -129,6 +129,14 @@ void logf(LogLevel level, fmt::string_view format_str, Args&&... args) { getLog().logf(level, format_str, std::forward(args)...); } +// Log only to stderr. This bypasses Logging / folly::Synchronized which +// might deadlock during exit. +template +void logf_stderr(fmt::string_view format_str, Args&&... args) { + auto msg = fmt::format(fmt::runtime(format_str), std::forward(args)...); + ignore_result(write(STDERR_FILENO, msg.data(), msg.size())); +} + #ifdef _WIN32 LONG WINAPI exception_filter(LPEXCEPTION_POINTERS excep); #endif diff --git a/watchman/SignalHandler.cpp b/watchman/SignalHandler.cpp index 8c88ec7513c7..d264af722d18 100644 --- a/watchman/SignalHandler.cpp +++ b/watchman/SignalHandler.cpp @@ -104,7 +104,7 @@ static void crash_handler(int signo, siginfo_t* si, void*) { } if (si) { - auto msg = fmt::format( + logf_stderr( "Terminating due to signal {} {} generated by pid={} uid={} {} ({})\n", signo, w_strsignal(signo), @@ -112,14 +112,12 @@ static void crash_handler(int signo, siginfo_t* si, void*) { si->si_uid, reason, uintptr_t(si->si_value.sival_ptr)); - ignore_result(write(STDERR_FILENO, msg.data(), msg.size())); } else { - auto msg = fmt::format( + logf_stderr( "Terminating due to signal {} {} {}\n", signo, w_strsignal(signo), reason); - ignore_result(write(STDERR_FILENO, msg.data(), msg.size())); } #if defined(HAVE_BACKTRACE) && defined(HAVE_BACKTRACE_SYMBOLS_FD) @@ -141,20 +139,21 @@ static void crash_handler(int signo, siginfo_t* si, void*) { #endif namespace { -void terminationHandler() { +[[noreturn]] void terminationHandler() { auto eptr = std::current_exception(); if (eptr) { try { std::rethrow_exception(eptr); } catch (const std::exception& exc) { - log(ABORT, "std::terminate was called. Exception: ", exc.what(), ".\n"); + logf_stderr("std::terminate was called. Exception: {}\n", exc.what()); } catch (...) { - log(ABORT, + logf_stderr( "std::terminate was called. Exception is not a std::exception.\n"); } } else { - log(ABORT, "std::terminate was called. There is no current exception.\n"); + logf_stderr("std::terminate was called. There is no current exception.\n"); } + std::abort(); } } // namespace diff --git a/watchman/main.cpp b/watchman/main.cpp index 9cdde7a29114..cba82b0abb86 100644 --- a/watchman/main.cpp +++ b/watchman/main.cpp @@ -1106,13 +1106,11 @@ int main(int argc, char** argv) { try { return inner_main(argc, argv); } catch (const std::exception& e) { - log(ERR, - "Uncaught C++ exception: ", - folly::exceptionStr(e).toStdString(), - "\n"); + logf_stderr( + "Uncaught C++ exception: {}\n", folly::exceptionStr(e).toStdString()); return 1; } catch (...) { - log(ERR, "Uncaught C++ exception: ...\n"); + logf_stderr("Uncaught C++ exception: ...\n"); return 1; } }