diff --git a/.github/workflows/ci-linux.yml b/.github/workflows/ci-linux.yml index 892ce9788e..87cd9dd604 100644 --- a/.github/workflows/ci-linux.yml +++ b/.github/workflows/ci-linux.yml @@ -61,7 +61,7 @@ jobs: - uses: ./.github/actions/install-all-dependences - uses: ./.github/actions/init-make-config with: - options: --cc=gcc --cxx=g++ --with-thrift --with-glog --with-rdma + options: --cc=gcc --cxx=g++ --with-thrift --with-glog --with-rdma --with-debug-mutex - name: compile run: | make -j ${{env.proc_num}} @@ -76,7 +76,7 @@ jobs: export CC=gcc && export CXX=g++ mkdir build cd build - cmake -DWITH_MESALINK=OFF -DWITH_GLOG=ON -DWITH_THRIFT=ON -DWITH_RDMA=ON .. + cmake -DWITH_MESALINK=OFF -DWITH_GLOG=ON -DWITH_THRIFT=ON -DWITH_RDMA=ON -DWITH_DEBUG_MUTEX=ON .. - name: compile run: | cd build @@ -86,7 +86,7 @@ jobs: runs-on: ubuntu-20.04 steps: - uses: actions/checkout@v2 - - run: bazel test --verbose_failures --define with_mesalink=false --define with_glog=true --define with_thrift=true -- //... -//example/... + - run: bazel test --verbose_failures --define with_mesalink=false --define with_glog=true --define with_thrift=true --define with_debug_mutex=true -- //... -//example/... clang-compile-with-make: runs-on: ubuntu-20.04 @@ -135,7 +135,7 @@ jobs: - uses: ./.github/actions/install-all-dependences - uses: ./.github/actions/init-make-config with: - options: --cc=clang --cxx=clang++ --with-thrift --with-glog --with-rdma + options: --cc=clang --cxx=clang++ --with-thrift --with-glog --with-rdma --with-debug-mutex - name: compile run: | make -j ${{env.proc_num}} @@ -150,7 +150,7 @@ jobs: export CC=clang && export CXX=clang++ mkdir build cd build - cmake -DWITH_MESALINK=OFF -DWITH_GLOG=ON -DWITH_THRIFT=ON -DWITH_RDMA=ON .. + cmake -DWITH_MESALINK=OFF -DWITH_GLOG=ON -DWITH_THRIFT=ON -DWITH_RDMA=ON -DWITH_DEBUG_MUTEX=ON .. - name: compile run: | cd build @@ -160,7 +160,7 @@ jobs: runs-on: ubuntu-20.04 steps: - uses: actions/checkout@v2 - - run: bazel build --verbose_failures --action_env=CC=clang-12 --define with_mesalink=false --define with_glog=true --define with_thrift=true -- //... -//example/... + - run: bazel build --verbose_failures --action_env=CC=clang-12 --define with_mesalink=false --define with_glog=true --define with_thrift=true --define with_debug_mutex=true -- //... -//example/... clang-unittest: runs-on: ubuntu-20.04 diff --git a/BUILD.bazel b/BUILD.bazel index 795e392e70..3faa4d1229 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -45,7 +45,10 @@ COPTS = [ }) + select({ "//bazel/config:brpc_with_rdma": ["-DBRPC_WITH_RDMA=1"], "//conditions:default": [""], -}) +}) + select({ + "//bazel/config:brpc_with_debug_mutex": ["-DBRPC_DEBUG_MUTEX=1"], + "//conditions:default": ["-DBRPC_DEBUG_MUTEX=0"], + }) LINKOPTS = [ "-pthread", diff --git a/CMakeLists.txt b/CMakeLists.txt index d2a140a7a2..c7cae6df7b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -26,6 +26,7 @@ option(WITH_DEBUG_SYMBOLS "With debug symbols" ON) option(WITH_THRIFT "With thrift framed protocol supported" OFF) option(WITH_SNAPPY "With snappy" OFF) option(WITH_RDMA "With RDMA" OFF) +option(WITH_DEBUG_MUTEX "With debugging mutex" OFF) option(BUILD_UNIT_TESTS "Whether to build unit tests" OFF) option(BUILD_FUZZ_TESTS "Whether to build fuzz tests" OFF) option(BUILD_BRPC_TOOLS "Whether to build brpc tools" ON) @@ -66,6 +67,11 @@ if(WITH_DEBUG_SYMBOLS) set(DEBUG_SYMBOL "-g") endif() +set(WITH_DEBUG_MUTEX_VAL "0") +if(WITH_DEBUG_MUTEX) + set(WITH_DEBUG_MUTEX_VAL "1") +endif() + if(WITH_THRIFT) set(THRIFT_CPP_FLAG "-DENABLE_THRIFT_FRAMED_PROTOCOL") find_library(THRIFT_LIB NAMES thrift) @@ -117,7 +123,7 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Darwin") set(CMAKE_CPP_FLAGS "${CMAKE_CPP_FLAGS} -Wno-deprecated-declarations -Wno-inconsistent-missing-override") endif() -set(CMAKE_CPP_FLAGS "${CMAKE_CPP_FLAGS} ${DEFINE_CLOCK_GETTIME} -DBRPC_WITH_GLOG=${WITH_GLOG_VAL} -DBRPC_WITH_RDMA=${WITH_RDMA_VAL} -DGFLAGS_NS=${GFLAGS_NS}") +set(CMAKE_CPP_FLAGS "${CMAKE_CPP_FLAGS} ${DEFINE_CLOCK_GETTIME} -DBRPC_WITH_GLOG=${WITH_GLOG_VAL} -DBRPC_WITH_RDMA=${WITH_RDMA_VAL} -DGFLAGS_NS=${GFLAGS_NS} -DBRPC_DEBUG_MUTEX=${WITH_DEBUG_MUTEX_VAL}") if(WITH_MESALINK) set(CMAKE_CPP_FLAGS "${CMAKE_CPP_FLAGS} -DUSE_MESALINK") endif() diff --git a/bazel/config/BUILD.bazel b/bazel/config/BUILD.bazel index bed04d3b57..20e40b5120 100644 --- a/bazel/config/BUILD.bazel +++ b/bazel/config/BUILD.bazel @@ -108,4 +108,10 @@ config_setting( name = "brpc_with_boringssl", define_values = {"BRPC_WITH_BORINGSSL": "true"}, visibility = ["//visibility:public"], +) + +config_setting( + name = "brpc_with_debug_mutex", + define_values = {"with_debug_mutex": "true"}, + visibility = ["//visibility:public"], ) \ No newline at end of file diff --git a/config_brpc.sh b/config_brpc.sh index ddaa6daa65..9024d71f43 100755 --- a/config_brpc.sh +++ b/config_brpc.sh @@ -38,12 +38,13 @@ else LDD=ldd fi -TEMP=`getopt -o v: --long headers:,libs:,cc:,cxx:,with-glog,with-thrift,with-rdma,with-mesalink,nodebugsymbols -n 'config_brpc' -- "$@"` +TEMP=`getopt -o v: --long headers:,libs:,cc:,cxx:,with-glog,with-thrift,with-rdma,with-mesalink,nodebugsymbols,with-debug-mutex -n 'config_brpc' -- "$@"` WITH_GLOG=0 WITH_THRIFT=0 WITH_RDMA=0 WITH_MESALINK=0 DEBUGSYMBOLS=-g +BRPC_DEBUG_MUTEX=0 if [ $? != 0 ] ; then >&2 $ECHO "Terminating..."; exit 1 ; fi @@ -68,6 +69,7 @@ while true; do --with-rdma) WITH_RDMA=1; shift 1 ;; --with-mesalink) WITH_MESALINK=1; shift 1 ;; --nodebugsymbols ) DEBUGSYMBOLS=; shift 1 ;; + --with-debug-mutex ) BRPC_DEBUG_MUTEX=1; shift 1 ;; -- ) shift; break ;; * ) break ;; esac @@ -405,7 +407,7 @@ append_to_output "STATIC_LINKINGS=$STATIC_LINKINGS" append_to_output "DYNAMIC_LINKINGS=$DYNAMIC_LINKINGS" # CPP means C PreProcessing, not C PlusPlus -CPPFLAGS="-DBRPC_WITH_GLOG=$WITH_GLOG -DGFLAGS_NS=$GFLAGS_NS" +CPPFLAGS="-DBRPC_WITH_GLOG=$WITH_GLOG -DGFLAGS_NS=$GFLAGS_NS -DBRPC_DEBUG_MUTEX=$BRPC_DEBUG_MUTEX" # Avoid over-optimizations of TLS variables by GCC>=4.8 # See: https://github.com/apache/brpc/issues/1693 diff --git a/src/bthread/mutex.cpp b/src/bthread/mutex.cpp index 403f6bb8a8..54a0ffbd6e 100644 --- a/src/bthread/mutex.cpp +++ b/src/bthread/mutex.cpp @@ -777,10 +777,58 @@ const MutexInternal MUTEX_LOCKED_RAW = {{1},{0},0}; BAIDU_CASSERT(sizeof(unsigned) == sizeof(MutexInternal), sizeof_mutex_internal_must_equal_unsigned); +#if BRPC_DEBUG_MUTEX +#define BTHREAD_MUTEX_RESET_OWNER \ + ((butil::atomic*)&m->owner.type) \ + ->store(OWNER_TYPE_NONE, butil::memory_order_relaxed) + +#define BTHREAD_MUTEX_SET_OWNER \ + do { \ + TaskGroup* task_group = BAIDU_GET_VOLATILE_THREAD_LOCAL(tls_task_group); \ + if (NULL != task_group && !task_group->is_current_main_task()) { \ + m->owner.id = bthread_self(); \ + ((butil::atomic*)&m->owner.type) \ + ->store(OWNER_TYPE_BTHREAD, butil::memory_order_release); \ + } else { \ + m->owner.id = pthread_numeric_id(); \ + ((butil::atomic*)&m->owner.type) \ + ->store(OWNER_TYPE_PTHREAD, butil::memory_order_release); \ + } \ + } while(false) + +// Check if the mutex has been locked by the current thread. +// Double lock on the same thread will cause deadlock. +#define BTHREAD_MUTEX_CHECK_OWNER \ + mutex_owner_type owner_type = ((butil::atomic*)&m->owner.type) \ + ->load(butil::memory_order_acquire); \ + bool double_lock = \ + (owner_type == OWNER_TYPE_BTHREAD && m->owner.id == bthread_self()) || \ + (owner_type == OWNER_TYPE_PTHREAD && m->owner.id == pthread_numeric_id()); \ + if (double_lock) { \ + butil::debug::StackTrace trace(true); \ + LOG(ERROR) << "Detected deadlock caused by double lock of bthread_mutex_t:" \ + << std::endl << trace.ToString(); \ + } +#else +#define BTHREAD_MUTEX_RESET_OWNER ((void)0) +#define BTHREAD_MUTEX_SET_OWNER ((void)0) +#define BTHREAD_MUTEX_CHECK_OWNER ((void)0) +#endif // BRPC_DEBUG_MUTEX + +inline int mutex_trylock_impl(bthread_mutex_t* m) { + MutexInternal* split = (MutexInternal*)m->butex; + if (!split->locked.exchange(1, butil::memory_order_acquire)) { + BTHREAD_MUTEX_SET_OWNER; + return 0; + } + return EBUSY; +} + const int MAX_SPIN_ITER = 4; -inline int mutex_lock_contended_impl( - bthread_mutex_t* m, const struct timespec* __restrict abstime) { +inline int mutex_lock_contended_impl(bthread_mutex_t* __restrict m, + const struct timespec* __restrict abstime) { + BTHREAD_MUTEX_CHECK_OWNER; // When a bthread first contends for a lock, active spinning makes sense. // Spin only few times and only if local `rq' is empty. TaskGroup* g = BAIDU_GET_VOLATILE_THREAD_LOCAL(tls_task_group); @@ -813,12 +861,43 @@ inline int mutex_lock_contended_impl( queue_lifo = true; } } + BTHREAD_MUTEX_SET_OWNER; return 0; } #ifdef BTHREAD_USE_FAST_PTHREAD_MUTEX namespace internal { +#if BRPC_DEBUG_MUTEX +#define FAST_PTHREAD_MUTEX_RESET_OWNER \ + ((butil::atomic*)&_owner.type) \ + ->store(OWNER_TYPE_NONE, butil::memory_order_relaxed) + +#define FAST_PTHREAD_MUTEX_SET_OWNER \ + _owner.id = pthread_numeric_id(); \ + ((butil::atomic*)&_owner.type) \ + ->store(OWNER_TYPE_PTHREAD,butil::memory_order_release) + +// Check if the mutex has been locked by the current thread. +// Double lock on the same thread will cause deadlock. +#define FAST_PTHREAD_MUTEX_CHECK_OWNER \ + mutex_owner_type owner_type = ((butil::atomic*)&_owner.type) \ + ->load(butil::memory_order_acquire); \ + if (owner_type == OWNER_TYPE_PTHREAD && _owner.id == pthread_numeric_id()) { \ + butil::debug::StackTrace trace(true); \ + LOG(ERROR) << "Detected deadlock caused by double lock of FastPthreadMutex:" \ + << std::endl << trace.ToString(); \ + } +#else +#define FAST_PTHREAD_MUTEX_RESET_OWNER ((void)0) +#define FAST_PTHREAD_MUTEX_SET_OWNER ((void)0) +#define FAST_PTHREAD_MUTEX_CHECK_OWNER ((void)0) +#endif // BRPC_DEBUG_MUTEX + +FastPthreadMutex::FastPthreadMutex() : _futex(0) { + FAST_PTHREAD_MUTEX_RESET_OWNER; +} + int FastPthreadMutex::lock_contended() { butil::atomic* whole = (butil::atomic*)&_futex; while (whole->exchange(BTHREAD_MUTEX_CONTENDED) & BTHREAD_MUTEX_LOCKED) { @@ -831,10 +910,12 @@ int FastPthreadMutex::lock_contended() { } void FastPthreadMutex::lock() { + FAST_PTHREAD_MUTEX_CHECK_OWNER; auto split = (bthread::MutexInternal*)&_futex; if (split->locked.exchange(1, butil::memory_order_acquire)) { (void)lock_contended(); } + FAST_PTHREAD_MUTEX_SET_OWNER; ++tls_pthread_lock_count; } @@ -842,12 +923,14 @@ bool FastPthreadMutex::try_lock() { auto split = (bthread::MutexInternal*)&_futex; bool lock = !split->locked.exchange(1, butil::memory_order_acquire); if (lock) { + FAST_PTHREAD_MUTEX_SET_OWNER; ++tls_pthread_lock_count; } return lock; } void FastPthreadMutex::unlock() { + FAST_PTHREAD_MUTEX_RESET_OWNER; auto whole = (butil::atomic*)&_futex; const unsigned prev = whole->exchange(0, butil::memory_order_release); // CAUTION: the mutex may be destroyed, check comments before butex_create @@ -857,6 +940,10 @@ void FastPthreadMutex::unlock() { --tls_pthread_lock_count; } +#undef FAST_PTHREAD_MUTEX_RESET_OWNER +#undef FAST_PTHREAD_MUTEX_SET_OWNER +#undef FAST_PTHREAD_MUTEX_CHECK_OWNER + } // namespace internal #endif // BTHREAD_USE_FAST_PTHREAD_MUTEX @@ -875,6 +962,7 @@ extern "C" { int bthread_mutex_init(bthread_mutex_t* __restrict m, const bthread_mutexattr_t* __restrict) { bthread::make_contention_site_invalid(&m->csite); + BTHREAD_MUTEX_RESET_OWNER; m->butex = bthread::butex_create_checked(); if (!m->butex) { return ENOMEM; @@ -889,11 +977,7 @@ int bthread_mutex_destroy(bthread_mutex_t* m) { } int bthread_mutex_trylock(bthread_mutex_t* m) { - bthread::MutexInternal* split = (bthread::MutexInternal*)m->butex; - if (!split->locked.exchange(1, butil::memory_order_acquire)) { - return 0; - } - return EBUSY; + return bthread::mutex_trylock_impl(m); } int bthread_mutex_lock_contended(bthread_mutex_t* m) { @@ -901,8 +985,7 @@ int bthread_mutex_lock_contended(bthread_mutex_t* m) { } int bthread_mutex_lock(bthread_mutex_t* m) { - bthread::MutexInternal* split = (bthread::MutexInternal*)m->butex; - if (!split->locked.exchange(1, butil::memory_order_acquire)) { + if (0 == bthread::mutex_trylock_impl(m)) { return 0; } // Don't sample when contention profiler is off. @@ -965,6 +1048,7 @@ int bthread_mutex_unlock(bthread_mutex_t* m) { saved_csite = m->csite; bthread::make_contention_site_invalid(&m->csite); } + BTHREAD_MUTEX_RESET_OWNER; const unsigned prev = whole->exchange(0, butil::memory_order_release); // CAUTION: the mutex may be destroyed, check comments before butex_create if (prev == BTHREAD_MUTEX_LOCKED) { @@ -982,6 +1066,9 @@ int bthread_mutex_unlock(bthread_mutex_t* m) { bthread::submit_contention(saved_csite, unlock_end_ns); return 0; } +#undef BTHREAD_MUTEX_RESET_OWNER +#undef BTHREAD_MUTEX_SET_OWNER +#undef BTHREAD_MUTEX_CHECK_OWNER #ifndef NO_PTHREAD_MUTEX_HOOK int pthread_mutex_lock(pthread_mutex_t* __mutex) { diff --git a/src/bthread/mutex.h b/src/bthread/mutex.h index ad6d2e5cbd..c59f3be834 100644 --- a/src/bthread/mutex.h +++ b/src/bthread/mutex.h @@ -35,6 +35,7 @@ extern int bthread_mutex_lock(bthread_mutex_t* mutex); extern int bthread_mutex_timedlock(bthread_mutex_t* __restrict mutex, const struct timespec* __restrict abstime); extern int bthread_mutex_unlock(bthread_mutex_t* mutex); +extern bthread_t bthread_self(void); __END_DECLS namespace bthread { @@ -71,14 +72,17 @@ namespace internal { #ifdef BTHREAD_USE_FAST_PTHREAD_MUTEX class FastPthreadMutex { public: - FastPthreadMutex() : _futex(0) {} - ~FastPthreadMutex() = default; + FastPthreadMutex(); void lock(); void unlock(); bool try_lock(); private: DISALLOW_COPY_AND_ASSIGN(FastPthreadMutex); int lock_contended(); + // Note: Owner detection of the mutex comes with average execution + // slowdown of about 50%., so it is only used for debugging and is + // only available when the `BRPC_DEBUG_MUTEX' macro is defined. + mutex_owner_t _owner; unsigned _futex; }; #else diff --git a/src/bthread/types.h b/src/bthread/types.h index 0aad64c403..bfeda87c7a 100644 --- a/src/bthread/types.h +++ b/src/bthread/types.h @@ -165,6 +165,17 @@ typedef struct { size_t sampling_range; } bthread_contention_site_t; +enum mutex_owner_type { + OWNER_TYPE_NONE = 0, + OWNER_TYPE_BTHREAD, + OWNER_TYPE_PTHREAD +}; + +struct mutex_owner_t { + mutex_owner_type type; + uint64_t id; +}; + typedef struct bthread_mutex_t { #if defined(__cplusplus) bthread_mutex_t() : butex(NULL), csite{} {} @@ -172,6 +183,10 @@ typedef struct bthread_mutex_t { #endif unsigned* butex; bthread_contention_site_t csite; + // Note: Owner detection of the mutex comes with average execution + // slowdown of about 50%., so it is only used for debugging and is + // only available when the `BRPC_DEBUG_MUTEX' macro is defined. + mutex_owner_t owner; } bthread_mutex_t; typedef struct { diff --git a/test/bthread_mutex_unittest.cpp b/test/bthread_mutex_unittest.cpp index 21bd60446f..6d42822bc8 100644 --- a/test/bthread_mutex_unittest.cpp +++ b/test/bthread_mutex_unittest.cpp @@ -224,6 +224,11 @@ TEST(MutexTest, performance) { butil::Mutex base_mutex; PerfTest(&base_mutex, (pthread_t*)NULL, thread_num, pthread_create, pthread_join); PerfTest(&base_mutex, (bthread_t*)NULL, thread_num, bthread_start_background, bthread_join); + + bthread::FastPthreadMutex fast_mutex; + PerfTest(&fast_mutex, (pthread_t*)NULL, thread_num, pthread_create, pthread_join); + PerfTest(&fast_mutex, (bthread_t*)NULL, thread_num, bthread_start_background, bthread_join); + bthread::Mutex bth_mutex; PerfTest(&bth_mutex, (pthread_t*)NULL, thread_num, pthread_create, pthread_join); PerfTest(&bth_mutex, (bthread_t*)NULL, thread_num, bthread_start_background, bthread_join);