Skip to content

Commit

Permalink
change logging in exit handlers to not avoid locking
Browse files Browse the repository at this point in the history
Summary:
We have seen deadlock running `terminationHandler` -> `hasSubscribers` in 2 threads.
It's unclear which other thread is holding the lock.

To make things easier to debug next time, let's change terminationHandler (and
also main.cpp) to bypass the logging lock and write to stderr directly.

Related threads (all threads in P536343453):

  Thread 11 (LWP 3275661):
  #0  syscall () at ../sysdeps/unix/sysv/linux/x86_64/syscall.S:38
  #1  0x0000000001cc995b in folly::detail::(anonymous namespace)::nativeFutexWaitImpl (addr=<optimized out>, expected=<optimized out>, absSystemTime=<optimized out>, absSteadyTime=<optimized out>, waitMask=<optimized out>) at fbcode/folly/detail/Futex.cpp:126
  #2  folly::detail::futexWaitImpl (futex=0x89, futex@entry=0x7f1c3ac2ef90, expected=994748889, absSystemTime=absSystemTime@entry=0x0, absSteadyTime=<optimized out>, absSteadyTime@entry=0x0, waitMask=waitMask@entry=1) at fbcode/folly/detail/Futex.cpp:254
  #3  0x0000000001d34bce in folly::detail::futexWait<std::atomic<unsigned int> > (futex=0x7f1c3ac2ef90, expected=137, waitMask=1) at buck-out/v2/gen/fbcode/110b607930331a92/folly/detail/__futex__/headers/folly/detail/Futex-inl.h:96
  #4  folly::SharedMutexImpl<false, void, std::atomic, folly::SharedMutexPolicyDefault>::WaitForever::doWait (this=<optimized out>, futex=..., expected=137, waitMask=1) at buck-out/v2/gen/fbcode/110b607930331a92/folly/__shared_mutex__/headers/folly/SharedMutex.h:718
  #5  folly::SharedMutexImpl<false, void, std::atomic, folly::SharedMutexPolicyDefault>::futexWaitForZeroBits<folly::SharedMutexImpl<false, void, std::atomic, folly::SharedMutexPolicyDefault>::WaitForever> (this=0x7f1c3ac2ef90, state=@0x7f1c149f88e4: 118379409, goal=128, waitMask=1, ctx=...) at buck-out/v2/gen/fbcode/110b607930331a92/folly/__shared_mutex__/headers/folly/SharedMutex.h:1184
  #6  0x0000000001cd42b2 in folly::SharedMutexImpl<false, void, std::atomic, folly::SharedMutexPolicyDefault>::yieldWaitForZeroBits<folly::SharedMutexImpl<false, void, std::atomic, folly::SharedMutexPolicyDefault>::WaitForever> (this=0x7f1c3ac2ef90, state=@0x7f1c149f88e4: 118379409, goal=128, waitMask=1, ctx=...) at buck-out/v2/gen/fbcode/110b607930331a92/folly/__shared_mutex__/headers/folly/SharedMutex.h:1151
  #7  folly::SharedMutexImpl<false, void, std::atomic, folly::SharedMutexPolicyDefault>::waitForZeroBits<folly::SharedMutexImpl<false, void, std::atomic, folly::SharedMutexPolicyDefault>::WaitForever> (this=0x7f1c3ac2ef90, state=@0x7f1c149f88e4: 118379409, goal=128, waitMask=1, ctx=...) at buck-out/v2/gen/fbcode/110b607930331a92/folly/__shared_mutex__/headers/folly/SharedMutex.h:1109
  #8  0x0000000001e7e14c in folly::SharedMutexImpl<false, void, std::atomic, folly::SharedMutexPolicyDefault>::lockSharedImpl<folly::SharedMutexImpl<false, void, std::atomic, folly::SharedMutexPolicyDefault>::WaitForever> (this=0x7f1c3ac2ef90, state=@0x7f1c149f88e4: 118379409, token=0x0, ctx=...) at buck-out/v2/gen/fbcode/110b607930331a92/folly/__shared_mutex__/headers/folly/SharedMutex.h:1664
  #9  folly::SharedMutexImpl<false, void, std::atomic, folly::SharedMutexPolicyDefault>::lockSharedImpl<folly::SharedMutexImpl<false, void, std::atomic, folly::SharedMutexPolicyDefault>::WaitForever> (this=0x7f1c3ac2ef90, token=0x0, ctx=...) at buck-out/v2/gen/fbcode/110b607930331a92/folly/__shared_mutex__/headers/folly/SharedMutex.h:1356
  #10 folly::SharedMutexImpl<false, void, std::atomic, folly::SharedMutexPolicyDefault>::lock_shared (this=0x7f1c3ac2ef90) at buck-out/v2/gen/fbcode/110b607930331a92/folly/__shared_mutex__/headers/folly/SharedMutex.h:495
  #11 std::shared_lock<folly::SharedMutexImpl<false, void, std::atomic, folly::SharedMutexPolicyDefault> >::shared_lock (this=<optimized out>, __m=...) at fbcode/third-party-buck/platform010/build/libgcc/include/c++/trunk/shared_mutex:727
  #12 0x0000000002d765fd in folly::LockedPtr<folly::Synchronized<watchman::Publisher::state, folly::SharedMutexImpl<false, void, std::atomic, folly::SharedMutexPolicyDefault> > const, folly::detail::SynchronizedLockPolicy<(folly::detail::SynchronizedMutexLevel)2, (folly::detail::SynchronizedMutexMethod)0> >::doLock<folly::SharedMutexImpl<false, void, std::atomic, folly::SharedMutexPolicyDefault>, std::shared_lock<folly::SharedMutexImpl<false, void, std::atomic, folly::SharedMutexPolicyDefault> >, folly::detail::SynchronizedLockPolicy<(folly::detail::SynchronizedMutexLevel)2, (folly::detail::SynchronizedMutexMethod)0>, 0> (mutex=...) at buck-out/v2/gen/fbcode/110b607930331a92/folly/__synchronized__/headers/folly/Synchronized.h:1493
  #13 folly::LockedPtr<folly::Synchronized<watchman::Publisher::state, folly::SharedMutexImpl<false, void, std::atomic, folly::SharedMutexPolicyDefault> > const, folly::detail::SynchronizedLockPolicy<(folly::detail::SynchronizedMutexLevel)2, (folly::detail::SynchronizedMutexMethod)0> >::LockedPtr (this=0x7f1c149f8928, parent=<optimized out>) at buck-out/v2/gen/fbcode/110b607930331a92/folly/__synchronized__/headers/folly/Synchronized.h:1272
  #14 folly::SynchronizedBase<folly::Synchronized<watchman::Publisher::state, folly::SharedMutexImpl<false, void, std::atomic, folly::SharedMutexPolicyDefault> >, (folly::detail::SynchronizedMutexLevel)2>::rlock (this=<optimized out>) at buck-out/v2/gen/fbcode/110b607930331a92/folly/__synchronized__/headers/folly/Synchronized.h:229
  #15 watchman::Publisher::hasSubscribers (this=<optimized out>) at fbcode/watchman/PubSub.cpp:117
  #16 0x0000000002eca798 in watchman::Log::log<char const (&) [39], char const*, char const (&) [3]> (this=<optimized out>, level=level@entry=watchman::ABORT, args=..., args=..., args=...) at buck-out/v2/gen/fbcode/110b607930331a92/watchman/__logging__/headers/watchman/Logging.h:42
  #17 0x0000000002ec9ba7 in watchman::log<char const (&) [39], char const*, char const (&) [3]> (level=watchman::ABORT, args=..., args=..., args=...) at buck-out/v2/gen/fbcode/110b607930331a92/watchman/__logging__/headers/watchman/Logging.h:121
  #18 (anonymous namespace)::terminationHandler () at fbcode/watchman/SignalHandler.cpp:159
  #19 0x00007f1c3b0c7b3a in __cxxabiv1::__terminate (handler=<optimized out>) at ../../.././libstdc++-v3/libsupc++/eh_terminate.cc:48
  #20 0x00007f1c3b0c7ba5 in std::terminate () at ../../.././libstdc++-v3/libsupc++/eh_terminate.cc:58
  #21 0x0000000001c38c8b in __clang_call_terminate ()
  #22 0x0000000003284c9e in folly::detail::terminate_with_<std::runtime_error, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >&> (args=...) at buck-out/v2/gen/fbcode/110b607930331a92/folly/lang/__exception__/headers/folly/lang/Exception.h:93
  #23 0x0000000003281bae in folly::terminate_with<std::runtime_error, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >&> (args=...) at buck-out/v2/gen/fbcode/110b607930331a92/folly/lang/__exception__/headers/folly/lang/Exception.h:123
  #24 folly::SingletonVault::fireShutdownTimer (this=<optimized out>) at fbcode/folly/Singleton.cpp:499
  #25 0x0000000003281ad9 in folly::(anonymous namespace)::fireShutdownSignalHelper (sigval=...) at fbcode/folly/Singleton.cpp:454
  #26 0x00007f1c3b42b939 in timer_sigev_thread (arg=<optimized out>) at ../sysdeps/unix/sysv/linux/timer_routines.c:55
  #27 0x00007f1c3b41fc0f in start_thread (arg=<optimized out>) at pthread_create.c:434
  #28 0x00007f1c3b4b21dc in clone3 () at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:81

  ...

  Thread 1 (LWP 3201992):
  #0  syscall () at ../sysdeps/unix/sysv/linux/x86_64/syscall.S:38
  #1  0x0000000001cc995b in folly::detail::(anonymous namespace)::nativeFutexWaitImpl (addr=<optimized out>, expected=<optimized out>, absSystemTime=<optimized out>, absSteadyTime=<optimized out>, waitMask=<optimized out>) at fbcode/folly/detail/Futex.cpp:126
  #2  folly::detail::futexWaitImpl (futex=0x89, futex@entry=0x7f1c3ac2ef90, expected=994748889, absSystemTime=absSystemTime@entry=0x0, absSteadyTime=<optimized out>, absSteadyTime@entry=0x0, waitMask=waitMask@entry=1) at fbcode/folly/detail/Futex.cpp:254
  #3  0x0000000001d34bce in folly::detail::futexWait<std::atomic<unsigned int> > (futex=0x7f1c3ac2ef90, expected=137, waitMask=1) at buck-out/v2/gen/fbcode/110b607930331a92/folly/detail/__futex__/headers/folly/detail/Futex-inl.h:96
  #4  folly::SharedMutexImpl<false, void, std::atomic, folly::SharedMutexPolicyDefault>::WaitForever::doWait (this=<optimized out>, futex=..., expected=137, waitMask=1) at buck-out/v2/gen/fbcode/110b607930331a92/folly/__shared_mutex__/headers/folly/SharedMutex.h:718
  #5  folly::SharedMutexImpl<false, void, std::atomic, folly::SharedMutexPolicyDefault>::futexWaitForZeroBits<folly::SharedMutexImpl<false, void, std::atomic, folly::SharedMutexPolicyDefault>::WaitForever> (this=0x7f1c3ac2ef90, state=@0x7ffd2d5be924: 118379408, goal=128, waitMask=1, ctx=...) at buck-out/v2/gen/fbcode/110b607930331a92/folly/__shared_mutex__/headers/folly/SharedMutex.h:1184
  #6  0x0000000001cd42b2 in folly::SharedMutexImpl<false, void, std::atomic, folly::SharedMutexPolicyDefault>::yieldWaitForZeroBits<folly::SharedMutexImpl<false, void, std::atomic, folly::SharedMutexPolicyDefault>::WaitForever> (this=0x7f1c3ac2ef90, state=@0x7ffd2d5be924: 118379408, goal=128, waitMask=1, ctx=...) at buck-out/v2/gen/fbcode/110b607930331a92/folly/__shared_mutex__/headers/folly/SharedMutex.h:1151
  #7  folly::SharedMutexImpl<false, void, std::atomic, folly::SharedMutexPolicyDefault>::waitForZeroBits<folly::SharedMutexImpl<false, void, std::atomic, folly::SharedMutexPolicyDefault>::WaitForever> (this=0x7f1c3ac2ef90, state=@0x7ffd2d5be924: 118379408, goal=128, waitMask=1, ctx=...) at buck-out/v2/gen/fbcode/110b607930331a92/folly/__shared_mutex__/headers/folly/SharedMutex.h:1109
  #8  0x0000000001e7e14c in folly::SharedMutexImpl<false, void, std::atomic, folly::SharedMutexPolicyDefault>::lockSharedImpl<folly::SharedMutexImpl<false, void, std::atomic, folly::SharedMutexPolicyDefault>::WaitForever> (this=0x7f1c3ac2ef90, state=@0x7ffd2d5be924: 118379408, token=0x0, ctx=...) at buck-out/v2/gen/fbcode/110b607930331a92/folly/__shared_mutex__/headers/folly/SharedMutex.h:1664
  #9  folly::SharedMutexImpl<false, void, std::atomic, folly::SharedMutexPolicyDefault>::lockSharedImpl<folly::SharedMutexImpl<false, void, std::atomic, folly::SharedMutexPolicyDefault>::WaitForever> (this=0x7f1c3ac2ef90, token=0x0, ctx=...) at buck-out/v2/gen/fbcode/110b607930331a92/folly/__shared_mutex__/headers/folly/SharedMutex.h:1356
  #10 folly::SharedMutexImpl<false, void, std::atomic, folly::SharedMutexPolicyDefault>::lock_shared (this=0x7f1c3ac2ef90) at buck-out/v2/gen/fbcode/110b607930331a92/folly/__shared_mutex__/headers/folly/SharedMutex.h:495
  #11 std::shared_lock<folly::SharedMutexImpl<false, void, std::atomic, folly::SharedMutexPolicyDefault> >::shared_lock (this=<optimized out>, __m=...) at fbcode/third-party-buck/platform010/build/libgcc/include/c++/trunk/shared_mutex:727
  #12 0x0000000002d765fd in folly::LockedPtr<folly::Synchronized<watchman::Publisher::state, folly::SharedMutexImpl<false, void, std::atomic, folly::SharedMutexPolicyDefault> > const, folly::detail::SynchronizedLockPolicy<(folly::detail::SynchronizedMutexLevel)2, (folly::detail::SynchronizedMutexMethod)0> >::doLock<folly::SharedMutexImpl<false, void, std::atomic, folly::SharedMutexPolicyDefault>, std::shared_lock<folly::SharedMutexImpl<false, void, std::atomic, folly::SharedMutexPolicyDefault> >, folly::detail::SynchronizedLockPolicy<(folly::detail::SynchronizedMutexLevel)2, (folly::detail::SynchronizedMutexMethod)0>, 0> (mutex=...) at buck-out/v2/gen/fbcode/110b607930331a92/folly/__synchronized__/headers/folly/Synchronized.h:1493
  #13 folly::LockedPtr<folly::Synchronized<watchman::Publisher::state, folly::SharedMutexImpl<false, void, std::atomic, folly::SharedMutexPolicyDefault> > const, folly::detail::SynchronizedLockPolicy<(folly::detail::SynchronizedMutexLevel)2, (folly::detail::SynchronizedMutexMethod)0> >::LockedPtr (this=0x7ffd2d5be968, parent=<optimized out>) at buck-out/v2/gen/fbcode/110b607930331a92/folly/__synchronized__/headers/folly/Synchronized.h:1272
  #14 folly::SynchronizedBase<folly::Synchronized<watchman::Publisher::state, folly::SharedMutexImpl<false, void, std::atomic, folly::SharedMutexPolicyDefault> >, (folly::detail::SynchronizedMutexLevel)2>::rlock (this=<optimized out>) at buck-out/v2/gen/fbcode/110b607930331a92/folly/__synchronized__/headers/folly/Synchronized.h:229
  #15 watchman::Publisher::hasSubscribers (this=<optimized out>) at fbcode/watchman/PubSub.cpp:117
  #16 0x0000000002ecac20 in watchman::Log::log<char const (&) [59]> (this=<optimized out>, level=level@entry=watchman::ABORT, args=...) at buck-out/v2/gen/fbcode/110b607930331a92/watchman/__logging__/headers/watchman/Logging.h:42
  #17 0x0000000002ec9b24 in watchman::log<char const (&) [59]> (level=watchman::ABORT, args=...) at buck-out/v2/gen/fbcode/110b607930331a92/watchman/__logging__/headers/watchman/Logging.h:121
  #18 (anonymous namespace)::terminationHandler () at fbcode/watchman/SignalHandler.cpp:165
  #19 0x00007f1c3b0c7b3a in __cxxabiv1::__terminate (handler=<optimized out>) at ../../.././libstdc++-v3/libsupc++/eh_terminate.cc:48
  #20 0x00007f1c3b0c7ba5 in std::terminate () at ../../.././libstdc++-v3/libsupc++/eh_terminate.cc:58
  #21 0x0000000002d8cde1 in std::thread::~thread (this=0x7f1c3ac2ef90) at fbcode/third-party-buck/platform010/build/libgcc/include/c++/trunk/bits/std_thread.h:152
  #22 0x00007f1c3b3cc8f8 in __run_exit_handlers (status=1, listp=0x7f1c3b598658 <__exit_funcs>, run_list_atexit=<optimized out>, run_dtors=<optimized out>) at exit.c:113
  #23 0x00007f1c3b3cca0a in __GI_exit (status=<optimized out>) at exit.c:143
  #24 0x00007f1c3b3b165e in __libc_start_call_main (main=0x2d11220 <main(int, char**)>, argc=2, argv=0x7ffd2d5bec78) at ../sysdeps/nptl/libc_start_call_main.h:74
  #25 0x00007f1c3b3b1718 in __libc_start_main_impl (main=0x2d11220 <main(int, char**)>, argc=2, argv=0x7ffd2d5bec78, init=<optimized out>, fini=<optimized out>, rtld_fini=<optimized out>, stack_end=0x7ffd2d5bec68) at ../csu/libc-start.c:409
  #26 0x0000000002d0e181 in _start () at ../sysdeps/x86_64/start.S:116

Reviewed By: xavierd

Differential Revision: D40166374

fbshipit-source-id: 7017e20234e5e0a9532eb61a63ac49ac0020d443
  • Loading branch information
quark-zju authored and facebook-github-bot committed Oct 7, 2022
1 parent 4f8e3b4 commit 152a0ad
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 13 deletions.
8 changes: 8 additions & 0 deletions watchman/Logging.h
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,14 @@ void logf(LogLevel level, fmt::string_view format_str, Args&&... args) {
getLog().logf(level, format_str, std::forward<Args>(args)...);
}

// Log only to stderr. This bypasses Logging / folly::Synchronized which
// might deadlock during exit.
template <typename... Args>
void logf_stderr(fmt::string_view format_str, Args&&... args) {
auto msg = fmt::format(fmt::runtime(format_str), std::forward<Args>(args)...);
ignore_result(write(STDERR_FILENO, msg.data(), msg.size()));
}

#ifdef _WIN32
LONG WINAPI exception_filter(LPEXCEPTION_POINTERS excep);
#endif
Expand Down
15 changes: 7 additions & 8 deletions watchman/SignalHandler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -104,22 +104,20 @@ static void crash_handler(int signo, siginfo_t* si, void*) {
}

if (si) {
auto msg = fmt::format(
logf_stderr(
"Terminating due to signal {} {} generated by pid={} uid={} {} ({})\n",
signo,
w_strsignal(signo),
si->si_pid,
si->si_uid,
reason,
uintptr_t(si->si_value.sival_ptr));
ignore_result(write(STDERR_FILENO, msg.data(), msg.size()));
} else {
auto msg = fmt::format(
logf_stderr(
"Terminating due to signal {} {} {}\n",
signo,
w_strsignal(signo),
reason);
ignore_result(write(STDERR_FILENO, msg.data(), msg.size()));
}

#if defined(HAVE_BACKTRACE) && defined(HAVE_BACKTRACE_SYMBOLS_FD)
Expand All @@ -141,20 +139,21 @@ static void crash_handler(int signo, siginfo_t* si, void*) {
#endif

namespace {
void terminationHandler() {
[[noreturn]] void terminationHandler() {
auto eptr = std::current_exception();
if (eptr) {
try {
std::rethrow_exception(eptr);
} catch (const std::exception& exc) {
log(ABORT, "std::terminate was called. Exception: ", exc.what(), ".\n");
logf_stderr("std::terminate was called. Exception: {}\n", exc.what());
} catch (...) {
log(ABORT,
logf_stderr(
"std::terminate was called. Exception is not a std::exception.\n");
}
} else {
log(ABORT, "std::terminate was called. There is no current exception.\n");
logf_stderr("std::terminate was called. There is no current exception.\n");
}
std::abort();
}
} // namespace

Expand Down
8 changes: 3 additions & 5 deletions watchman/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1106,13 +1106,11 @@ int main(int argc, char** argv) {
try {
return inner_main(argc, argv);
} catch (const std::exception& e) {
log(ERR,
"Uncaught C++ exception: ",
folly::exceptionStr(e).toStdString(),
"\n");
logf_stderr(
"Uncaught C++ exception: {}\n", folly::exceptionStr(e).toStdString());
return 1;
} catch (...) {
log(ERR, "Uncaught C++ exception: ...\n");
logf_stderr("Uncaught C++ exception: ...\n");
return 1;
}
}
Expand Down

0 comments on commit 152a0ad

Please sign in to comment.