From c936f06a6251175d32690c8757c2d4e743b3e483 Mon Sep 17 00:00:00 2001 From: Bright Chen Date: Mon, 16 Dec 2024 21:00:24 +0800 Subject: [PATCH] Support task tracer --- .../actions/init-ut-make-config/action.yml | 13 + .../install-all-dependences/action.yml | 2 +- .github/workflows/ci-linux.yml | 46 +- CMakeLists.txt | 20 + config_brpc.sh | 29 +- docs/cn/bthread_tracer.md | 69 +++ docs/cn/getting_started.md | 10 +- docs/en/getting_started.md | 8 +- docs/images/bthread_status_model.svg | 3 + docs/images/bthread_stb_model.svg | 3 + src/brpc/builtin/bthreads_service.cpp | 18 +- src/bthread/bthread.cpp | 75 +++ src/bthread/bthread.h | 21 +- src/bthread/butex.cpp | 33 +- src/bthread/execution_queue.cpp | 4 +- src/bthread/execution_queue_inl.h | 12 +- src/bthread/task_control.cpp | 45 +- src/bthread/task_control.h | 20 +- src/bthread/task_group.cpp | 114 ++-- src/bthread/task_group.h | 17 +- src/bthread/task_group_inl.h | 8 +- src/bthread/task_meta.h | 57 +- src/bthread/task_tracer.cpp | 491 ++++++++++++++++++ src/bthread/task_tracer.h | 134 +++++ src/butil/debug/stack_trace.cc | 13 +- src/butil/debug/stack_trace.h | 1 + src/butil/debug/stack_trace_posix.cc | 22 + test/brpc_builtin_service_unittest.cpp | 31 +- test/bthread_unittest.cpp | 41 +- 29 files changed, 1219 insertions(+), 141 deletions(-) create mode 100644 .github/actions/init-ut-make-config/action.yml create mode 100644 docs/cn/bthread_tracer.md create mode 100644 docs/images/bthread_status_model.svg create mode 100644 docs/images/bthread_stb_model.svg create mode 100644 src/bthread/task_tracer.cpp create mode 100644 src/bthread/task_tracer.h diff --git a/.github/actions/init-ut-make-config/action.yml b/.github/actions/init-ut-make-config/action.yml new file mode 100644 index 0000000000..b13800ed98 --- /dev/null +++ b/.github/actions/init-ut-make-config/action.yml @@ -0,0 +1,13 @@ +inputs: + options: + description: extra options for config_brpc.sh + required: false +runs: + using: "composite" + steps: + - run: sudo git clone https://github.com/libunwind/libunwind.git && cd libunwind && sudo git checkout tags/v1.8.1 && sudo mkdir -p /libunwind && sudo autoreconf -i && sudo CC=clang CXX=clang++ ./configure --prefix=/libunwind && sudo make -j ${{env.proc_num}} && sudo make install + shell: bash + - run: sudo apt-get update && sudo apt-get install -y libgtest-dev cmake gdb libstdc++6-9-dbg && cd /usr/src/gtest && sudo cmake . && sudo make -j ${{env.proc_num}} && sudo mv lib/libgtest* /usr/lib/ + shell: bash + - run: sh config_brpc.sh --headers="/libunwind/include /usr/include" --libs="/libunwind/lib /usr/lib /usr/lib64" --nodebugsymbols ${{inputs.options}} + shell: bash diff --git a/.github/actions/install-all-dependences/action.yml b/.github/actions/install-all-dependences/action.yml index f507aa4070..8ff502f81a 100644 --- a/.github/actions/install-all-dependences/action.yml +++ b/.github/actions/install-all-dependences/action.yml @@ -2,7 +2,7 @@ runs: using: "composite" steps: - uses: ./.github/actions/install-essential-dependences - - run: sudo apt-get install -y libgoogle-glog-dev automake bison flex libboost-all-dev libevent-dev libtool pkg-config libibverbs1 libibverbs-dev + - run: sudo apt-get install -y libgoogle-glog-dev automake bison flex libboost-all-dev libevent-dev libtool pkg-config libibverbs1 libibverbs-dev libunwind8-dev shell: bash - run: wget https://archive.apache.org/dist/thrift/0.11.0/thrift-0.11.0.tar.gz && tar -xf thrift-0.11.0.tar.gz shell: bash diff --git a/.github/workflows/ci-linux.yml b/.github/workflows/ci-linux.yml index 14961ebb3b..1854483a42 100644 --- a/.github/workflows/ci-linux.yml +++ b/.github/workflows/ci-linux.yml @@ -31,7 +31,7 @@ jobs: steps: - uses: actions/checkout@v2 - uses: ./.github/actions/install-essential-dependences - - name: cmake + - name: cmake run: | export CC=gcc && export CXX=g++ mkdir build @@ -47,7 +47,7 @@ jobs: steps: - uses: actions/checkout@v2 - run: bazel test --verbose_failures -- //... -//example/... - + gcc-compile-with-boringssl: runs-on: ubuntu-20.04 steps: @@ -61,7 +61,7 @@ jobs: - uses: ./.github/actions/install-all-dependences - uses: ./.github/actions/init-make-config with: - options: --cc=gcc --cxx=g++ --with-thrift --with-glog --with-rdma --with-debug-bthread-sche-safety --with-debug-lock + options: --cc=gcc --cxx=g++ --with-thrift --with-glog --with-rdma --with-debug-bthread-sche-safety --with-debug-lock --with-bthread-tracer - name: compile run: | make -j ${{env.proc_num}} @@ -76,7 +76,7 @@ jobs: export CC=gcc && export CXX=g++ mkdir build cd build - cmake -DWITH_MESALINK=OFF -DWITH_GLOG=ON -DWITH_THRIFT=ON -DWITH_RDMA=ON -DWITH_DEBUG_BTHREAD_SCHE_SAFETY=ON -DWITH_DEBUG_LOCK=ON .. + cmake -DWITH_MESALINK=OFF -DWITH_GLOG=ON -DWITH_THRIFT=ON -DWITH_RDMA=ON -DWITH_DEBUG_BTHREAD_SCHE_SAFETY=ON -DWITH_DEBUG_LOCK=ON -WITH_BTHREAD_TRACER=ON .. - name: compile run: | cd build @@ -105,7 +105,7 @@ jobs: steps: - uses: actions/checkout@v2 - uses: ./.github/actions/install-essential-dependences - - name: cmake + - name: cmake run: | export CC=clang && export CXX=clang++ mkdir build @@ -135,7 +135,7 @@ jobs: - uses: ./.github/actions/install-all-dependences - uses: ./.github/actions/init-make-config with: - options: --cc=clang --cxx=clang++ --with-thrift --with-glog --with-rdma --with-debug-bthread-sche-safety --with-debug-lock + options: --cc=clang --cxx=clang++ --with-thrift --with-glog --with-rdma --with-debug-bthread-sche-safety --with-debug-lock --with-bthread-tracer - name: compile run: | make -j ${{env.proc_num}} @@ -150,7 +150,7 @@ jobs: export CC=clang && export CXX=clang++ mkdir build cd build - cmake -DWITH_MESALINK=OFF -DWITH_GLOG=ON -DWITH_THRIFT=ON -DWITH_RDMA=ON -DWITH_DEBUG_BTHREAD_SCHE_SAFETY=ON -DWITH_DEBUG_LOCK=ON .. + cmake -DWITH_MESALINK=OFF -DWITH_GLOG=ON -DWITH_THRIFT=ON -DWITH_RDMA=ON -DWITH_DEBUG_BTHREAD_SCHE_SAFETY=ON -DWITH_DEBUG_LOCK=ON -WITH_BTHREAD_TRACER=ON .. - name: compile run: | cd build @@ -165,21 +165,17 @@ jobs: clang-unittest: runs-on: ubuntu-20.04 steps: - - uses: actions/checkout@v2 - - uses: ./.github/actions/install-essential-dependences - - name: install gtest - run: | - sudo apt-get update - sudo apt-get install -y cmake libgtest-dev gdb - cd /usr/src/gtest && sudo cmake . && sudo make && sudo mv lib/libgtest* /usr/lib/ - - uses: ./.github/actions/init-make-config - with: - options: --cc=clang --cxx=clang++ - - name: compile tests - run: | - cd test - make -j ${{env.proc_num}} - - name: run tests - run: | - cd test - sh ./run_tests.sh + - uses: actions/checkout@v2 + - uses: ./.github/actions/install-essential-dependences + - uses: ./.github/actions/init-ut-make-config + with: + options: --cc=clang --cxx=clang++ --with-bthread-tracer + - name: compile tests + run: | + cat config.mk + cd test + make -j ${{env.proc_num}} + - name: run tests + run: | + cd test + sh ./run_tests.sh diff --git a/CMakeLists.txt b/CMakeLists.txt index f3688766bb..a6b7716b29 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -24,6 +24,7 @@ option(WITH_BORINGSSL "With BoringSSL" OFF) option(DEBUG "Print debug logs" OFF) option(WITH_DEBUG_SYMBOLS "With debug symbols" ON) option(WITH_THRIFT "With thrift framed protocol supported" OFF) +option(WITH_BTHREAD_TRACER "With bthread tracer supported" OFF) option(WITH_SNAPPY "With snappy" OFF) option(WITH_RDMA "With RDMA" OFF) option(WITH_DEBUG_BTHREAD_SCHE_SAFETY "With debugging bthread sche safety" OFF) @@ -81,6 +82,20 @@ if(WITH_THRIFT) endif() endif() +if (WITH_BTHREAD_TRACER) + if (NOT (CMAKE_SYSTEM_NAME STREQUAL "Linux") OR NOT (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64")) + message(FATAL_ERROR "bthread tracer is only supported on Linux x86_64 platform") + endif() + find_path(LIBUNWIND_INCLUDE_PATH NAMES libunwind.h) + find_library(LIBUNWIND_LIB NAMES unwind) + find_library(LIBUNWIND_X86_64_LIB NAMES unwind-x86_64) + if (NOT LIBUNWIND_INCLUDE_PATH OR NOT LIBUNWIND_LIB) + message(FATAL_ERROR "Fail to find libunwind, which is needed by bthread tracer") + endif() + add_definitions(-DBRPC_BTHREAD_TRACER) + include_directories(${LIBUNWIND_INCLUDE_PATH}) +endif () + set(WITH_RDMA_VAL "0") if(WITH_RDMA) set(WITH_RDMA_VAL "1") @@ -319,6 +334,11 @@ if(WITH_SNAPPY) set(BRPC_PRIVATE_LIBS "${BRPC_PRIVATE_LIBS} -lsnappy") endif() +if (WITH_BTHREAD_TRACER) + set(DYNAMIC_LIB ${DYNAMIC_LIB} ${LIBUNWIND_LIB} ${LIBUNWIND_X86_64_LIB}) + set(BRPC_PRIVATE_LIBS "${BRPC_PRIVATE_LIBS} -lunwind -lunwind-x86_64") +endif() + if(CMAKE_SYSTEM_NAME STREQUAL "Linux") set(DYNAMIC_LIB ${DYNAMIC_LIB} rt) set(BRPC_PRIVATE_LIBS "${BRPC_PRIVATE_LIBS} -lrt") diff --git a/config_brpc.sh b/config_brpc.sh index fa607292de..9b4536608d 100755 --- a/config_brpc.sh +++ b/config_brpc.sh @@ -38,11 +38,12 @@ else LDD=ldd fi -TEMP=`getopt -o v: --long headers:,libs:,cc:,cxx:,with-glog,with-thrift,with-rdma,with-mesalink,with-debug-bthread-sche-safety,with-debug-lock,nodebugsymbols -n 'config_brpc' -- "$@"` +TEMP=`getopt -o v: --long headers:,libs:,cc:,cxx:,with-glog,with-thrift,with-rdma,with-mesalink,with-bthread-tracer,with-debug-bthread-sche-safety,with-debug-lock,nodebugsymbols -n 'config_brpc' -- "$@"` WITH_GLOG=0 WITH_THRIFT=0 WITH_RDMA=0 WITH_MESALINK=0 +WITH_BTHREAD_TRACER=0 BRPC_DEBUG_BTHREAD_SCHE_SAFETY=0 DEBUGSYMBOLS=-g BRPC_DEBUG_LOCK=0 @@ -69,6 +70,7 @@ while true; do --with-thrift) WITH_THRIFT=1; shift 1 ;; --with-rdma) WITH_RDMA=1; shift 1 ;; --with-mesalink) WITH_MESALINK=1; shift 1 ;; + --with-bthread-tracer) WITH_BTHREAD_TRACER=1; shift 1 ;; --with-debug-bthread-sche-safety ) BRPC_DEBUG_BTHREAD_SCHE_SAFETY=1; shift 1 ;; --with-debug-lock ) BRPC_DEBUG_LOCK=1; shift 1 ;; --nodebugsymbols ) DEBUGSYMBOLS=; shift 1 ;; @@ -352,8 +354,27 @@ fi LEVELDB_HDR=$(find_dir_of_header_or_die leveldb/db.h) -HDRS=$($ECHO "$GFLAGS_HDR\n$PROTOBUF_HDR\n$ABSL_HDR\n$LEVELDB_HDR\n$OPENSSL_HDR" | sort | uniq) -LIBS=$($ECHO "$GFLAGS_LIB\n$PROTOBUF_LIB\n$ABSL_LIB\n$LEVELDB_LIB\n$OPENSSL_LIB\n$SNAPPY_LIB" | sort | uniq) +CPPFLAGS= + +if [ $WITH_BTHREAD_TRACER != 0 ]; then + if [ "$SYSTEM" != "Linux" ] || [ "$(uname -m)" != "x86_64" ]; then + >&2 $ECHO "bthread tracer is only supported on Linux x86_64 platform" + exit 1 + fi + LIBUNWIND_HDR=$(find_dir_of_header_or_die libunwind.h) + LIBUNWIND_LIB=$(find_dir_of_lib_or_die unwind) + + CPPFLAGS="${CPPFLAGS} -DBRPC_BTHREAD_TRACER" + + if [ -f "$LIBUNWIND_LIB/libunwind.$SO" ]; then + DYNAMIC_LINKINGS="$DYNAMIC_LINKINGS -lunwind -lunwind-x86_64" + else + STATIC_LINKINGS="$STATIC_LINKINGS -lunwind -lunwind-x86_64" + fi +fi + +HDRS=$($ECHO "$LIBUNWIND_HDR\n$GFLAGS_HDR\n$PROTOBUF_HDR\n$ABSL_HDR\n$LEVELDB_HDR\n$OPENSSL_HDR" | sort | uniq) +LIBS=$($ECHO "$LIBUNWIND_LIB\n$GFLAGS_LIB\n$PROTOBUF_LIB\n$ABSL_LIB\n$LEVELDB_LIB\n$OPENSSL_LIB\n$SNAPPY_LIB" | sort | uniq) absent_in_the_list() { TMP=`$ECHO "$1\n$2" | sort | uniq` @@ -411,7 +432,7 @@ append_to_output "STATIC_LINKINGS=$STATIC_LINKINGS" append_to_output "DYNAMIC_LINKINGS=$DYNAMIC_LINKINGS" # CPP means C PreProcessing, not C PlusPlus -CPPFLAGS="-DBRPC_WITH_GLOG=$WITH_GLOG -DGFLAGS_NS=$GFLAGS_NS -DBRPC_DEBUG_BTHREAD_SCHE_SAFETY=$BRPC_DEBUG_BTHREAD_SCHE_SAFETY -DBRPC_DEBUG_LOCK=$BRPC_DEBUG_LOCK" +CPPFLAGS="${CPPFLAGS} -DBRPC_WITH_GLOG=$WITH_GLOG -DGFLAGS_NS=$GFLAGS_NS -DBRPC_DEBUG_BTHREAD_SCHE_SAFETY=$BRPC_DEBUG_BTHREAD_SCHE_SAFETY -DBRPC_DEBUG_LOCK=$BRPC_DEBUG_LOCK" # Avoid over-optimizations of TLS variables by GCC>=4.8 # See: https://github.com/apache/brpc/issues/1693 diff --git a/docs/cn/bthread_tracer.md b/docs/cn/bthread_tracer.md new file mode 100644 index 0000000000..5eb2ce86ee --- /dev/null +++ b/docs/cn/bthread_tracer.md @@ -0,0 +1,69 @@ +gdb(ptrace)+ gdb_bthread_stack.py主要的缺点是要慢和阻塞进程,需要一种高效的追踪bthread调用栈的方法。 + +bRPC框架的协作式用户态协程无法像Golang内建的抢占式协程一样实现高效的STW(Stop the World),框架也无法干预用户逻辑的执行,所以要追踪bthread调用栈是比较困难的。 + +在线追踪bthread调用栈需要解决以下问题: +1. 追踪挂起bthread的调用栈。 +2. 追踪运行中bthread的调用栈。 + +# bthread状态模型 + +以下是目前的bthread状态模型。 + +![bthread状态模型](../images/bthread_status_model.svg) + +# 设计方案 + +## 核心思路 + +为了解决上述两个问题,该方案实现了STB(Stop The Bthread),核心思路可以简单总结为,在追踪bthread调用栈的过程中,状态不能流转到当前追踪方法不支持的状态。STB包含了两种追踪模式:上下文(context)追踪模式和信号追踪模式。 + +### 上下文(context)追踪模式 +上下文追踪模式可以追踪挂起bthread的调用栈。挂起的bthread栈是稳定的,利用TaskMeta.stack中保存的上下文信息(x86_64下关键的寄存器主要是RIP、RSP、RBP),通过一些可以回溯指定上下文调用栈的库来追踪bthread调用栈。但是挂起的bthread随时可能会被唤醒,执行逻辑(包括jump_stack),则bthread栈会一直变化。不稳定的上下文是不能用来追踪调用栈的,需要在jump_stack前拦截bthread的调度,等到调用栈追踪完成后才继续运行bthread。所以,上下文追踪模式支持就绪、挂起这两个状态。 + +### 信号追踪模式 + +信号追踪模式可以追踪运行中bthread的调用栈。运行中bthread是不稳定的,不能使用TaskMeta.stack来追踪bthread调用栈。只能另辟蹊径,使用信号中断bthread运行逻辑,在信号处理函数中回溯bthread调用栈。使用信号有两个问题: + +1. 异步信号安全问题。 +2. 信号追踪模式不支持jump_stack。调用栈回溯需要寄存器信息,但jump_stack会操作寄存器,这个过程是不安全的,所以jump_stack不能被信号中断,需要在jump_stack前拦截bthread的调度,等到bthread调用栈追踪完成后才继续挂起bthread。 + +所以,追踪模式只支持运行状态。 + +### 小结 + +jump_stack是bthread挂起或者运行的必经之路,也是STB的拦截点。STB将状态分成三类: +1. 上下文追踪模式的状态:就绪、挂起。 +2. 支持信号追踪模式的状态:运行。 +3. 不支持追踪的状态。jump_stack的过程是不允许使用以上两种调用栈追踪方法,需要在jump_stack前拦截bthread的调度,等到调用栈追踪完成后才继续调度bthread。 + +### 详细流程 + +以下是引入STB后的bthread状态模型,在原来bthread状态模型的基础上,加入两个状态(拦截点):将运行、挂起中。 + +![bthread STB状态模型](../images/bthread_stb_model.svg) + +经过上述分析,总结出STB的流程: + +1. TaskTracer(实现STB的一个模块)收到追踪bthread调用栈的请求时,标识正在追踪。追踪完成后,标识追踪完成,并TaskTracer发信号通知可能处于将运行或者挂起中状态的bthread。根据bthread状态,TaskTracer执行不同的逻辑: +- 创建、就绪但还没分配栈、销毁:直接结束追踪。 +- 挂起、就绪:使用上下文追踪模式追踪bthread的调用栈。 +- 运行:使用信号追踪模式追踪bthread的调用栈。 +- 将运行、挂起中:TaskTracer自旋等到bthread状态流转到下一个状态(挂起或者运行)后继续追踪。 + +2. TaskTracer追踪时,bthread根据状态也会执行不同的逻辑: +- 创建、就绪但还没分配栈、就绪:不需要额外处理。 +- 挂起、运行:通知TaskTracer继续追踪。 +- 将运行、挂起中、销毁:bthread通过条件变量等到TaskTracer追踪完成。TaskTracer追踪完成后会通过条件变量通知bthread继续执行jump_stack。 + +# 使用方法 + +1. 下载安装libunwind。 +2. 给config_brpc.sh增加`--with-bthread-tracer`选项或者给cmake增加`-DWITH_BTHREAD_TRACER=ON`选项。 +3. 访问服务的内置服务:`http://ip:port/bthreads/?st=1`或者代码里调用`bthread::stack_trace()`函数。 +4. 如果希望追踪pthread的调用栈,在对应pthread上调用`bthread::init_for_pthread_stack_trace()`函数获取一个伪bthread_t,然后使用步骤3即可获取pthread调用栈。 + +# 相关flag + +- `enable_fast_unwind`:是否启用快速回溯功能,默认为true。大多数情况下,不需要关闭快速回溯功能。除非你关注的调用栈函数名转换失败,显示为``,则可以尝试关闭快速回溯功能,但这会导致性能下降。以包含30帧的调用栈举例,快速回溯只需要400~500us,而关闭快速回溯则需要4ms左右,性能下降了近10倍。 +- `signal_trace_timeout_ms`:信号追踪模式的超时时间,默认为50ms。虽然libunwind文档显示回溯功能是异步信号安全的,但是[gpertools社区发现libunwind在某些情况下会死锁](https://github.com/gperftools/gperftools/issues/775),所以TaskTracer会设置了超时时间,超时后会放弃回溯,打破死锁。 \ No newline at end of file diff --git a/docs/cn/getting_started.md b/docs/cn/getting_started.md index 13f8480512..e027e53842 100644 --- a/docs/cn/getting_started.md +++ b/docs/cn/getting_started.md @@ -364,7 +364,7 @@ pb 3.x中的Arena至今没被支持。 ## tcmalloc: 1.7-2.5 -brpc默认**不**链接 [tcmalloc](http://goog-perftools.sourceforge.net/doc/tcmalloc.html)。用户按需要链接tcmalloc。 +bRPC默认**不**链接 [tcmalloc](http://goog-perftools.sourceforge.net/doc/tcmalloc.html)。用户按需要链接tcmalloc。 和glibc内置的ptmalloc相比,tcmalloc通常能提升性能。然而不同版本的tcmalloc可能表现迥异。例如:tcmalloc 2.1与 tcmalloc 1.7和2.5相比,可能会让brpc的多线程样例性能显著恶化(tcmalloc中的一个自旋锁导致的)。甚至不同的小版本号之间变现也可能不同。当你的程序表现不符合预期的时候,移除tcmalloc然后尝试其他版本。 @@ -382,7 +382,7 @@ brpc默认**不**链接 [tcmalloc](http://goog-perftools.sourceforge.net/doc/tcm ## glog: 3.3+ -brpc实现了一个默认的[日志功能](../../src/butil/logging.h)它和glog冲突。要替换成glog,可以给config_brpc.sh增加*--with-glog*选项或者给cmake增加`-DWITH_GLOG=ON`选项。 +brpc实现了一个默认的[日志功能](../../src/butil/logging.h)它和glog冲突。要替换成glog,可以给config_brpc.sh增加`--with-glog`选项或者给cmake增加`-DWITH_GLOG=ON`选项。 ## valgrind: 3.8+ @@ -392,6 +392,12 @@ brpc会自动检测valgrind(然后注册bthread的栈)。不支持老版本 无已知问题。 +## libunwind: 1.3-1.8.1 + +bRPC默认**不**链接 [libunwind](https://github.com/libunwind/libunwind)。用户需要追踪bthread功能则链接libunwind,可以给config_brpc.sh增加*--with-bthread-tracer*选项或者给cmake增加`-DWITH_BTHREAD_TRACER=ON`选项。 + +建议使用最新版本的libunwind。 + # 实例追踪 我们提供了一个程序去帮助你追踪和监控所有brpc实例。 只需要在某处运行 [trackme_server](https://github.com/apache/brpc/tree/master/tools/trackme_server/) 然后再带着 -trackme_server=SERVER参数启动需要被追踪的实例。trackme_server将从实例周期性地收到ping消息然后打印日志。您可以从日志中聚合实例地址,并调用实例的内置服务以获取更多信息。 diff --git a/docs/en/getting_started.md b/docs/en/getting_started.md index a819762047..a2ae80d47c 100644 --- a/docs/en/getting_started.md +++ b/docs/en/getting_started.md @@ -377,7 +377,7 @@ When you remove tcmalloc, not only remove the linkage with tcmalloc but also the ## glog: 3.3+ -brpc implements a default [logging utility](../../src/butil/logging.h) which conflicts with glog. To replace this with glog, add *--with-glog* to config_brpc.sh or add `-DWITH_GLOG=ON` to cmake. +brpc implements a default [logging utility](../../src/butil/logging.h) which conflicts with glog. To replace this with glog, add `--with-glog` to config_brpc.sh or add `-DWITH_GLOG=ON` to cmake. ## valgrind: 3.8+ @@ -385,6 +385,12 @@ brpc detects valgrind automatically (and registers stacks of bthread). Older val ## thrift: 0.9.3-0.11.0 +## libunwind: 1.3-1.8.1 + +brpc does **not** link [libunwind](https://github.com/libunwind/libunwind) by default. Users link libunwind on-demand by adding `--with-glog` to config_brpc.sh or adding `-DWITH_GLOG=ON` to cmake. + +It is recommended to use the latest possible version of libunwind. + no known issues. # Track instances diff --git a/docs/images/bthread_status_model.svg b/docs/images/bthread_status_model.svg new file mode 100644 index 0000000000..9bd5edf52a --- /dev/null +++ b/docs/images/bthread_status_model.svg @@ -0,0 +1,3 @@ + + +
创建
创建
就绪(调度队列)
就绪(调度队列)
运行中
运行中
挂起
挂起
销毁
销毁
sched_to
sched_...
yield/wakeup/timeout/interrupt
yield/wakeup/timeout/interrupt
sched_to
sched_to
start_urgent(sched_to)
start_urgent(sched_to)
butex/sleep
butex/sleep
start_background/start_urgent
start_...
jump_stack
jump_stack
\ No newline at end of file diff --git a/docs/images/bthread_stb_model.svg b/docs/images/bthread_stb_model.svg new file mode 100644 index 0000000000..85176c3d64 --- /dev/null +++ b/docs/images/bthread_stb_model.svg @@ -0,0 +1,3 @@ + + +
创建
创建
就绪(调度队列)
就绪(调度队列)
将运行(拦截点
将运行(拦截点)
运行
运行
挂起
挂起
挂起中(拦截点
挂起中(拦截点)
销毁
销毁
sched_to
sched_...
yield/wakeup/timeout/interrupt
yield/wakeup/timeout/interrupt
sched_to
sched_to
start_urgent
start_...
butex/sleep
butex/sleep
start_background/start_urgent
start_...
jump_stack
jump_stack
sched_to
sched_...
\ No newline at end of file diff --git a/src/brpc/builtin/bthreads_service.cpp b/src/brpc/builtin/bthreads_service.cpp index 209f7a9934..f65ff347f0 100644 --- a/src/brpc/builtin/bthreads_service.cpp +++ b/src/brpc/builtin/bthreads_service.cpp @@ -24,6 +24,9 @@ namespace bthread { void print_task(std::ostream& os, bthread_t tid); +#ifdef BRPC_BTHREAD_TRACER +void stack_trace(std::ostream& os, bthread_t tid); +#endif // BRPC_BTHREAD_TRACER } @@ -40,12 +43,25 @@ void BthreadsService::default_method(::google::protobuf::RpcController* cntl_bas const std::string& constraint = cntl->http_request().unresolved_path(); if (constraint.empty()) { +#ifdef BRPC_BTHREAD_TRACER + os << "Use /bthreads/ or /bthreads/?st=1 for stack trace"; +#else os << "Use /bthreads/"; +#endif // BRPC_BTHREAD_TRACER } else { char* endptr = NULL; bthread_t tid = strtoull(constraint.c_str(), &endptr, 10); - if (*endptr == '\0' || *endptr == '/') { + if (*endptr == '\0' || *endptr == '/' || *endptr == '?') { ::bthread::print_task(os, tid); + +#ifdef BRPC_BTHREAD_TRACER + const std::string* st = cntl->http_request().uri().GetQuery("st"); + LOG(INFO) << "st=" << (st ? *st : "NULL"); + if (NULL != st && *st == "1") { + os << "\nbthread call stack:\n"; + ::bthread::stack_trace(os, tid); + } +#endif // BRPC_BTHREAD_TRACER } else { cntl->SetFailed(ENOMETHOD, "path=%s is not a bthread id", constraint.c_str()); diff --git a/src/bthread/bthread.cpp b/src/bthread/bthread.cpp index bf2c24f819..6f0b7d3108 100644 --- a/src/bthread/bthread.cpp +++ b/src/bthread/bthread.cpp @@ -19,9 +19,11 @@ // Date: Tue Jul 10 17:40:58 CST 2012 +#include #include #include "butil/macros.h" // BAIDU_CASSERT #include "butil/logging.h" +#include "butil/thread_local.h" #include "bthread/task_group.h" // TaskGroup #include "bthread/task_control.h" // TaskControl #include "bthread/timer_thread.h" @@ -115,6 +117,79 @@ inline TaskControl* get_or_new_task_control() { return c; } +#ifdef BRPC_BTHREAD_TRACER +BAIDU_THREAD_LOCAL TaskMeta* pthread_fake_meta = NULL; + +bthread_t init_for_pthread_stack_trace() { + if (NULL != pthread_fake_meta) { + return pthread_fake_meta->tid; + } + + TaskControl* c = get_task_control(); + if (NULL == c) { + LOG(ERROR) << "TaskControl has not been created, " + "please use bthread_start_xxx before call this function"; + return INVALID_BTHREAD; + } + + butil::ResourceId slot; + pthread_fake_meta = butil::get_resource(&slot); + if (BAIDU_UNLIKELY(NULL == pthread_fake_meta)) { + LOG(ERROR) << "Fail to get TaskMeta"; + return INVALID_BTHREAD; + } + + pthread_fake_meta->attr = BTHREAD_ATTR_PTHREAD; + pthread_fake_meta->tid = make_tid(*pthread_fake_meta->version_butex, slot); + // Make TaskTracer use signal trace mode for pthread. + c->_task_tracer.set_running_status(syscall(SYS_gettid), pthread_fake_meta); + + // Release the TaskMeta at exit of pthread. + butil::thread_atexit([]() { + // Similar to TaskGroup::task_runner. + bool tracing; + { + BAIDU_SCOPED_LOCK(pthread_fake_meta->version_lock); + tracing = TaskTracer::set_end_status_unsafe(pthread_fake_meta); + // If resulting version is 0, + // change it to 1 to make bthread_t never be 0. + if (0 == ++*pthread_fake_meta->version_butex) { + ++*pthread_fake_meta->version_butex; + } + } + + if (tracing) { + // Wait for tracing completion. + get_task_control()->_task_tracer.WaitForTracing(pthread_fake_meta); + } + get_task_control()->_task_tracer.set_status( + TASK_STATUS_UNKNOWN, pthread_fake_meta); + + butil::return_resource(get_slot(pthread_fake_meta->tid)); + pthread_fake_meta = NULL; + }); + + return pthread_fake_meta->tid; +} + +void stack_trace(std::ostream& os, bthread_t tid) { + TaskControl* c = get_task_control(); + if (NULL == c) { + os << "TaskControl has not been created"; + return; + } + c->stack_trace(os, tid); +} + +std::string stack_trace(bthread_t tid) { + TaskControl* c = get_task_control(); + if (NULL == c) { + return "TaskControl has not been created"; + } + return c->stack_trace(tid); +} +#endif // BRPC_BTHREAD_TRACER + static int add_workers_for_each_tag(int num) { int added = 0; auto c = get_task_control(); diff --git a/src/bthread/bthread.h b/src/bthread/bthread.h index 8532b3b322..5055cb237b 100644 --- a/src/bthread/bthread.h +++ b/src/bthread/bthread.h @@ -28,12 +28,27 @@ #include "bthread/errno.h" #if defined(__cplusplus) -# include -# include "bthread/mutex.h" // use bthread_mutex_t in the RAII way -#endif +#include +#include "bthread/mutex.h" // use bthread_mutex_t in the RAII way +#endif // __cplusplus #include "bthread/id.h" +#if defined(__cplusplus) && defined(BRPC_BTHREAD_TRACER) +#include "butil/resource_pool.h" +#include "bthread/task_meta.h" +namespace bthread { +// Assign a TaskMeta to the pthread and set the state to Running, +// so that `stack_trace()' can trace the call stack of the pthread. +bthread_t init_for_pthread_stack_trace(); + +// Trace the call stack of the bthread, or pthread which has been +// initialized by `init_for_pthread_stack_trace()'. +void stack_trace(std::ostream& os, bthread_t tid); +std::string stack_trace(bthread_t tid); +} // namespace bthread +#endif // __cplusplus && BRPC_BTHREAD_TRACER + __BEGIN_DECLS // Create bthread `fn(args)' with attributes `attr' and put the identifier into diff --git a/src/bthread/butex.cpp b/src/bthread/butex.cpp index 25cb953924..c54198c6cf 100644 --- a/src/bthread/butex.cpp +++ b/src/bthread/butex.cpp @@ -283,11 +283,11 @@ inline TaskGroup* get_task_group(TaskControl* c, bthread_tag_t tag) { return is_same_tag(tag) ? tls_task_group : c->choose_one_group(tag); } -inline void run_in_local_task_group(TaskGroup* g, bthread_t tid, bool nosignal) { +inline void run_in_local_task_group(TaskGroup* g, TaskMeta* next_meta, bool nosignal) { if (!nosignal) { - TaskGroup::exchange(&g, tid); + TaskGroup::exchange(&g, next_meta); } else { - g->ready_to_run(tid, nosignal); + g->ready_to_run(next_meta, nosignal); } } @@ -311,9 +311,9 @@ int butex_wake(void* arg, bool nosignal) { unsleep_if_necessary(bbw, get_global_timer_thread()); TaskGroup* g = get_task_group(bbw->control, bbw->tag); if (g == tls_task_group) { - run_in_local_task_group(g, bbw->tid, nosignal); + run_in_local_task_group(g, bbw->task_meta, nosignal); } else { - g->ready_to_run_remote(bbw->tid, check_nosignal(nosignal, g->tag())); + g->ready_to_run_remote(bbw->task_meta, check_nosignal(nosignal, g->tag())); } return 1; } @@ -363,7 +363,7 @@ int butex_wake_n(void* arg, size_t n, bool nosignal) { w->RemoveFromList(); unsleep_if_necessary(w, get_global_timer_thread()); auto g = get_task_group(w->control, w->tag); - g->ready_to_run_general(w->tid, true); + g->ready_to_run_general(w->task_meta, true); nwakeups[g->tag()] = g; ++nwakeup; } @@ -375,9 +375,9 @@ int butex_wake_n(void* arg, size_t n, bool nosignal) { } auto g = get_task_group(next->control, next->tag); if (g == tls_task_group) { - run_in_local_task_group(g, next->tid, nosignal); + run_in_local_task_group(g, next->task_meta, nosignal); } else { - g->ready_to_run_remote(next->tid, check_nosignal(nosignal, g->tag())); + g->ready_to_run_remote(next->task_meta, check_nosignal(nosignal, g->tag())); } return nwakeup; } @@ -436,7 +436,7 @@ int butex_wake_except(void* arg, bthread_t excluded_bthread) { w->RemoveFromList(); unsleep_if_necessary(w, get_global_timer_thread()); auto g = get_task_group(w->control, w->tag); - g->ready_to_run_general(w->tid, true); + g->ready_to_run_general(w->task_meta, true); nwakeups[g->tag()] = g; ++nwakeup; } while (!bthread_waiters.empty()); @@ -480,9 +480,9 @@ int butex_requeue(void* arg, void* arg2) { unsleep_if_necessary(bbw, get_global_timer_thread()); auto g = is_same_tag(bbw->tag) ? tls_task_group : NULL; if (g) { - TaskGroup::exchange(&g, front->tid); + TaskGroup::exchange(&g, bbw->task_meta); } else { - bbw->control->choose_one_group(bbw->tag)->ready_to_run_remote(front->tid); + bbw->control->choose_one_group(bbw->tag)->ready_to_run_remote(bbw->task_meta); } return 1; } @@ -520,7 +520,7 @@ inline bool erase_from_butex(ButexWaiter* bw, bool wakeup, WaiterState state) { if (erased && wakeup) { if (bw->tid) { ButexBthreadWaiter* bbw = static_cast(bw); - get_task_group(bbw->control, bbw->tag)->ready_to_run_general(bw->tid); + get_task_group(bbw->control, bbw->tag)->ready_to_run_general(bbw->task_meta); } else { ButexPthreadWaiter* pw = static_cast(bw); wakeup_pthread(pw); @@ -535,7 +535,7 @@ struct WaitForButexArgs { bool prepend; }; -static void wait_for_butex(void* arg) { +void wait_for_butex(void* arg) { auto args = static_cast(arg); ButexBthreadWaiter* const bw = args->bw; Butex* const b = bw->initial_butex; @@ -565,6 +565,9 @@ static void wait_for_butex(void* arg) { b->waiters.Append(bw); } bw->container.store(b, butil::memory_order_relaxed); +#ifdef BRPC_BTHREAD_TRACER + bw->control->_task_tracer.set_status(TASK_STATUS_SUSPENDED, bw->task_meta); +#endif // BRPC_BTHREAD_TRACER if (bw->abstime != NULL) { bw->sleep_id = get_global_timer_thread()->schedule( erase_from_butex_and_wakeup, bw, *bw->abstime); @@ -582,7 +585,7 @@ static void wait_for_butex(void* arg) { // the two functions. The on-stack ButexBthreadWaiter is safe to use and // bw->waiter_state will not change again. // unsleep_if_necessary(bw, get_global_timer_thread()); - tls_task_group->ready_to_run(bw->tid); + tls_task_group->ready_to_run(bw->task_meta); // FIXME: jump back to original thread is buggy. // // Value unmatched or waiter is already woken up by TimerThread, jump @@ -697,7 +700,7 @@ int butex_wait(void* arg, int expected_value, const timespec* abstime, bool prep // release fence matches with acquire fence in interrupt_and_consume_waiters // in task_group.cpp to guarantee visibility of `interrupted'. bbw.task_meta->current_waiter.store(&bbw, butil::memory_order_release); - WaitForButexArgs args{ &bbw, prepend}; + WaitForButexArgs args{ &bbw, prepend }; g->set_remained(wait_for_butex, &args); TaskGroup::sched(&g); diff --git a/src/bthread/execution_queue.cpp b/src/bthread/execution_queue.cpp index 557669ee05..bb01882cb2 100644 --- a/src/bthread/execution_queue.cpp +++ b/src/bthread/execution_queue.cpp @@ -68,7 +68,7 @@ inline ExecutionQueueVars* get_execq_vars() { void ExecutionQueueBase::start_execute(TaskNode* node) { node->next = TaskNode::UNCONNECTED; - node->status = UNEXECUTED; + node->status = TaskNode::UNEXECUTED; node->iterated = false; if (node->high_priority) { // Add _high_priority_tasks before pushing this task into queue to @@ -304,7 +304,7 @@ int ExecutionQueueBase::_execute(TaskNode* head, bool high_priority, int* nitera if (head != NULL && head->stop_task) { CHECK(head->next == NULL); head->iterated = true; - head->status = EXECUTED; + head->status = TaskNode::EXECUTED; TaskIteratorBase iter(NULL, this, true, false); _execute_func(_meta, _type_specific_function, iter); if (niterated) { diff --git a/src/bthread/execution_queue_inl.h b/src/bthread/execution_queue_inl.h index 6472505187..f5998a2628 100644 --- a/src/bthread/execution_queue_inl.h +++ b/src/bthread/execution_queue_inl.h @@ -38,17 +38,17 @@ struct ExecutionQueueId { uint64_t value; }; -enum TaskStatus { - UNEXECUTED = 0, - EXECUTING = 1, - EXECUTED = 2 -}; - struct TaskNode; class ExecutionQueueBase; typedef void (*clear_task_mem)(TaskNode*); struct BAIDU_CACHELINE_ALIGNMENT TaskNode { + enum TaskStatus { + UNEXECUTED = 0, + EXECUTING = 1, + EXECUTED = 2 + }; + TaskNode() : version(0) , status(UNEXECUTED) diff --git a/src/bthread/task_control.cpp b/src/bthread/task_control.cpp index d0549ea9e8..707e1adbf2 100644 --- a/src/bthread/task_control.cpp +++ b/src/bthread/task_control.cpp @@ -19,6 +19,7 @@ // Date: Tue Jul 10 17:40:58 CST 2012 +#include // SYS_gettid #include "butil/scoped_lock.h" // BAIDU_SCOPED_LOCK #include "butil/errno.h" // berror #include "butil/logging.h" @@ -88,14 +89,19 @@ void* TaskControl::worker_thread(void* arg) { LOG(ERROR) << "Fail to create TaskGroup in pthread=" << pthread_self(); return NULL; } + + g->_tid = syscall(SYS_gettid); + std::string worker_thread_name = butil::string_printf( - "brpc_wkr:%d-%d", g->tag(), c->_next_worker_id.fetch_add(1, butil::memory_order_relaxed)); + "brpc_wkr:%d-%d", g->tag(), + c->_next_worker_id.fetch_add(1, butil::memory_order_relaxed)); butil::PlatformThread::SetName(worker_thread_name.c_str()); - BT_VLOG << "Created worker=" << pthread_self() << " bthread=" << g->main_tid() - << " tag=" << g->tag(); + BT_VLOG << "Created worker=" << pthread_self() << " tid=" << g->_tid + << " bthread=" << g->main_tid() << " tag=" << g->tag(); tls_task_group = g; c->_nworkers << 1; c->tag_nworkers(g->tag()) << 1; + g->run_main_task(); stat = g->main_stat(); @@ -208,6 +214,13 @@ int TaskControl::init(int concurrency) { LOG(ERROR) << "Fail to get global_timer_thread"; return -1; } + +#ifdef BRPC_BTHREAD_TRACER + if (_task_tracer.Init() != 0) { + LOG(ERROR) << "Fail to init TaskTracer"; + return -1; + } +#endif // BRPC_BTHREAD_TRACER _workers.resize(_concurrency); for (int i = 0; i < _concurrency; ++i) { @@ -281,6 +294,16 @@ TaskGroup* TaskControl::choose_one_group(bthread_tag_t tag) { return NULL; } +#ifdef BRPC_BTHREAD_TRACER +void TaskControl::stack_trace(std::ostream& os, bthread_t tid) { + _task_tracer.Trace(os, tid); +} + +std::string TaskControl::stack_trace(bthread_t tid) { + return _task_tracer.Trace(tid); +} +#endif // BRPC_BTHREAD_TRACER + extern int stop_and_join_epoll_threads(); void TaskControl::stop_and_join() { @@ -301,13 +324,19 @@ void TaskControl::stop_and_join() { pl.stop(); } } - // Interrupt blocking operations. - for (size_t i = 0; i < _workers.size(); ++i) { - interrupt_pthread(_workers[i]); + + for (auto worker: _workers) { + // Interrupt blocking operations. +#ifdef BRPC_BTHREAD_TRACER + // TaskRacer has registered signal handler for SIGURG. + pthread_kill(worker, SIGURG); +#else + interrupt_pthread(worker); +#endif // BRPC_BTHREAD_TRACER } // Join workers - for (size_t i = 0; i < _workers.size(); ++i) { - pthread_join(_workers[i], NULL); + for (auto worker : _workers) { + pthread_join(worker, NULL); } } diff --git a/src/bthread/task_control.h b/src/bthread/task_control.h index 125980797a..95820a86f9 100644 --- a/src/bthread/task_control.h +++ b/src/bthread/task_control.h @@ -25,14 +25,15 @@ #ifndef NDEBUG #include // std::ostream #endif +#include #include // size_t #include #include #include #include "butil/atomicops.h" // butil::atomic #include "bvar/bvar.h" // bvar::PassiveStatus +#include "bthread/task_tracer.h" #include "bthread/task_meta.h" // TaskMeta -#include "butil/resource_pool.h" // ResourcePool #include "bthread/work_stealing_queue.h" // WorkStealingQueue #include "bthread/parking_lot.h" @@ -43,7 +44,11 @@ class TaskGroup; // Control all task groups class TaskControl { - friend class TaskGroup; +friend class TaskGroup; +friend void wait_for_butex(void*); +#ifdef BRPC_BTHREAD_TRACER +friend bthread_t init_for_pthread_stack_trace(); +#endif // BRPC_BTHREAD_TRACER public: TaskControl(); @@ -86,6 +91,12 @@ class TaskControl { // If this method is called after init(), it never returns NULL. TaskGroup* choose_one_group(bthread_tag_t tag); +#ifdef BRPC_BTHREAD_TRACER + // A stacktrace of bthread can be helpful in debugging. + void stack_trace(std::ostream& os, bthread_t tid); + std::string stack_trace(bthread_t tid); +#endif // BRPC_BTHREAD_TRACER + private: typedef std::array TaggedGroups; static const int PARKING_LOT_NUM = 4; @@ -144,6 +155,11 @@ class TaskControl { std::vector*> _tagged_nbthreads; std::vector _pl; + +#ifdef BRPC_BTHREAD_TRACER + TaskTracer _task_tracer; +#endif // BRPC_BTHREAD_TRACER + }; inline bvar::LatencyRecorder& TaskControl::exposed_pending_time() { diff --git a/src/bthread/task_group.cpp b/src/bthread/task_group.cpp index bab6469001..23333d4388 100644 --- a/src/bthread/task_group.cpp +++ b/src/bthread/task_group.cpp @@ -35,7 +35,6 @@ #include "bthread/task_control.h" #include "bthread/task_group.h" #include "bthread/timer_thread.h" -#include "bthread/errno.h" namespace bthread { @@ -45,8 +44,7 @@ static const bthread_attr_t BTHREAD_ATTR_TASKGROUP = { static bool pass_bool(const char*, bool) { return true; } DEFINE_bool(show_bthread_creation_in_vars, false, "When this flags is on, The time " - "from bthread creation to first run will be recorded and shown " - "in /vars"); + "from bthread creation to first run will be recorded and shown in /vars"); const bool ALLOW_UNUSED dummy_show_bthread_creation_in_vars = ::GFLAGS_NS::RegisterFlagValidator(&FLAGS_show_bthread_creation_in_vars, pass_bool); @@ -183,8 +181,7 @@ void TaskGroup::run_main_task() { } TaskGroup::TaskGroup(TaskControl* c) - : - _cur_meta(NULL) + : _cur_meta(NULL) , _control(c) , _num_nosignal(0) , _nsignaled(0) @@ -202,7 +199,7 @@ TaskGroup::TaskGroup(TaskControl* c) , _sched_recursive_guard(0) #endif , _tag(BTHREAD_TAG_DEFAULT) -{ + , _tid(-1) { _steal_seed = butil::fast_rand(); _steal_offset = OFFSET_TABLE[_steal_seed % ARRAY_SIZE(OFFSET_TABLE)]; CHECK(c); @@ -262,6 +259,9 @@ void TaskGroup::task_runner(intptr_t skip_remained) { // NOTE: tls_task_group is volatile since tasks are moved around // different groups. TaskGroup* g = tls_task_group; +#ifdef BRPC_BTHREAD_TRACER + TaskTracer::set_running_status(g->tid(), g->_cur_meta); +#endif // BRPC_BTHREAD_TRACER if (!skip_remained) { while (g->_last_context_remained) { @@ -337,14 +337,28 @@ void TaskGroup::task_runner(intptr_t skip_remained) { // is 0, change it to 1 to make bthread_t never be 0. Any access // or join to the bthread after changing version will be rejected. // The spinlock is for visibility of TaskGroup::get_attr. +#ifdef BRPC_BTHREAD_TRACER + bool tracing = false; +#endif // BRPC_BTHREAD_TRACER { BAIDU_SCOPED_LOCK(m->version_lock); +#ifdef BRPC_BTHREAD_TRACER + tracing = TaskTracer::set_end_status_unsafe(m); +#endif // BRPC_BTHREAD_TRACER if (0 == ++*m->version_butex) { ++*m->version_butex; } } butex_wake_except(m->version_butex, 0); +#ifdef BRPC_BTHREAD_TRACER + if (tracing) { + // Wait for tracing completion. + g->_control->_task_tracer.WaitForTracing(m); + } + g->_control->_task_tracer.set_status(TASK_STATUS_UNKNOWN, m); +#endif // BRPC_BTHREAD_TRACER + g->_control->_nbthreads << -1; g->_control->tag_nbthreads(g->tag()) << -1; g->set_remained(TaskGroup::_release_last_context, m); @@ -379,7 +393,7 @@ int TaskGroup::start_foreground(TaskGroup** pg, const bthread_attr_t using_attr = (attr ? *attr : BTHREAD_ATTR_NORMAL); butil::ResourceId slot; TaskMeta* m = butil::get_resource(&slot); - if (__builtin_expect(!m, 0)) { + if (BAIDU_UNLIKELY(NULL == m)) { return ENOMEM; } CHECK(m->current_waiter.load(butil::memory_order_relaxed) == NULL); @@ -406,9 +420,12 @@ int TaskGroup::start_foreground(TaskGroup** pg, TaskGroup* g = *pg; g->_control->_nbthreads << 1; g->_control->tag_nbthreads(g->tag()) << 1; +#ifdef BRPC_BTHREAD_TRACER + g->_control->_task_tracer.set_status(TASK_STATUS_CREATED, m); +#endif // BRPC_BTHREAD_TRACER if (g->is_current_pthread_task()) { // never create foreground task in pthread. - g->ready_to_run(m->tid, (using_attr.flags & BTHREAD_NOSIGNAL)); + g->ready_to_run(m, using_attr.flags & BTHREAD_NOSIGNAL); } else { // NOSIGNAL affects current task, not the new task. RemainedFn fn = NULL; @@ -417,10 +434,7 @@ int TaskGroup::start_foreground(TaskGroup** pg, } else { fn = ready_to_run_in_worker; } - ReadyToRunArgs args = { - g->current_tid(), - (bool)(using_attr.flags & BTHREAD_NOSIGNAL) - }; + ReadyToRunArgs args = { g->_cur_meta, (bool)(using_attr.flags & BTHREAD_NOSIGNAL) }; g->set_remained(fn, &args); TaskGroup::sched_to(pg, m->tid); } @@ -439,7 +453,7 @@ int TaskGroup::start_background(bthread_t* __restrict th, const bthread_attr_t using_attr = (attr ? *attr : BTHREAD_ATTR_NORMAL); butil::ResourceId slot; TaskMeta* m = butil::get_resource(&slot); - if (__builtin_expect(!m, 0)) { + if (BAIDU_UNLIKELY(NULL == m)) { return ENOMEM; } CHECK(m->current_waiter.load(butil::memory_order_relaxed) == NULL); @@ -464,10 +478,13 @@ int TaskGroup::start_background(bthread_t* __restrict th, } _control->_nbthreads << 1; _control->tag_nbthreads(tag()) << 1; +#ifdef BRPC_BTHREAD_TRACER + _control->_task_tracer.set_status(TASK_STATUS_CREATED, m); +#endif // BRPC_BTHREAD_TRACER if (REMOTE) { - ready_to_run_remote(m->tid, (using_attr.flags & BTHREAD_NOSIGNAL)); + ready_to_run_remote(m, (using_attr.flags & BTHREAD_NOSIGNAL)); } else { - ready_to_run(m->tid, (using_attr.flags & BTHREAD_NOSIGNAL)); + ready_to_run(m, (using_attr.flags & BTHREAD_NOSIGNAL)); } return 0; } @@ -489,7 +506,7 @@ int TaskGroup::join(bthread_t tid, void** return_value) { return EINVAL; } TaskMeta* m = address_meta(tid); - if (__builtin_expect(!m, 0)) { + if (BAIDU_UNLIKELY(NULL == m)) { // The bthread is not created yet, this join is definitely wrong. return EINVAL; } @@ -625,9 +642,16 @@ void TaskGroup::sched_to(TaskGroup** pg, TaskMeta* next_meta) { if (cur_meta->stack != NULL) { if (next_meta->stack != cur_meta->stack) { CheckBthreadScheSafety(); +#ifdef BRPC_BTHREAD_TRACER + g->_control->_task_tracer.set_status(TASK_STATUS_JUMPING, cur_meta); + g->_control->_task_tracer.set_status(TASK_STATUS_JUMPING, next_meta); +#endif // BRPC_BTHREAD_TRACER jump_stack(cur_meta->stack, next_meta->stack); // probably went to another group, need to assign g again. g = BAIDU_GET_VOLATILE_THREAD_LOCAL(tls_task_group); +#ifdef BRPC_BTHREAD_TRACER + TaskTracer::set_running_status(g->tid(), g->_cur_meta); +#endif // BRPC_BTHREAD_TRACER } #ifndef NDEBUG else { @@ -669,8 +693,12 @@ void TaskGroup::destroy_self() { } } -void TaskGroup::ready_to_run(bthread_t tid, bool nosignal) { - push_rq(tid); + +void TaskGroup::ready_to_run(TaskMeta* meta, bool nosignal) { +#ifdef BRPC_BTHREAD_TRACER + _control->_task_tracer.set_status(TASK_STATUS_READY, meta); +#endif // BRPC_BTHREAD_TRACER + push_rq(meta->tid); if (nosignal) { ++_num_nosignal; } else { @@ -690,9 +718,12 @@ void TaskGroup::flush_nosignal_tasks() { } } -void TaskGroup::ready_to_run_remote(bthread_t tid, bool nosignal) { +void TaskGroup::ready_to_run_remote(TaskMeta* meta, bool nosignal) { +#ifdef BRPC_BTHREAD_TRACER + _control->_task_tracer.set_status(TASK_STATUS_READY, meta); +#endif // BRPC_BTHREAD_TRACER _remote_rq._mutex.lock(); - while (!_remote_rq.push_locked(tid)) { + while (!_remote_rq.push_locked(meta->tid)) { flush_nosignal_tasks_remote_locked(_remote_rq._mutex); LOG_EVERY_SECOND(ERROR) << "_remote_rq is full, capacity=" << _remote_rq.capacity(); @@ -723,11 +754,11 @@ void TaskGroup::flush_nosignal_tasks_remote_locked(butil::Mutex& locked_mutex) { _control->signal_task(val, _tag); } -void TaskGroup::ready_to_run_general(bthread_t tid, bool nosignal) { +void TaskGroup::ready_to_run_general(TaskMeta* meta, bool nosignal) { if (tls_task_group == this) { - return ready_to_run(tid, nosignal); + return ready_to_run(meta, nosignal); } - return ready_to_run_remote(tid, nosignal); + return ready_to_run_remote(meta, nosignal); } void TaskGroup::flush_nosignal_tasks_general() { @@ -739,12 +770,16 @@ void TaskGroup::flush_nosignal_tasks_general() { void TaskGroup::ready_to_run_in_worker(void* args_in) { ReadyToRunArgs* args = static_cast(args_in); - return tls_task_group->ready_to_run(args->tid, args->nosignal); + return tls_task_group->ready_to_run(args->meta, args->nosignal); } void TaskGroup::ready_to_run_in_worker_ignoresignal(void* args_in) { ReadyToRunArgs* args = static_cast(args_in); - return tls_task_group->push_rq(args->tid); +#ifdef BRPC_BTHREAD_TRACER + tls_task_group->_control->_task_tracer.set_status( + TASK_STATUS_READY, args->meta); +#endif // BRPC_BTHREAD_TRACER + return tls_task_group->push_rq(args->meta->tid); } struct SleepArgs { @@ -759,7 +794,7 @@ static void ready_to_run_from_timer_thread(void* arg) { const SleepArgs* e = static_cast(arg); auto g = e->group; auto tag = g->tag(); - g->control()->choose_one_group(tag)->ready_to_run_remote(e->tid); + g->control()->choose_one_group(tag)->ready_to_run_remote(e->meta); } void TaskGroup::_add_sleep_event(void* void_args) { @@ -768,6 +803,9 @@ void TaskGroup::_add_sleep_event(void* void_args) { // will be gone. SleepArgs e = *static_cast(void_args); TaskGroup* g = e.group; +#ifdef BRPC_BTHREAD_TRACER + g->_control->_task_tracer.set_status(TASK_STATUS_SUSPENDED, e.meta); +#endif // BRPC_BTHREAD_TRACER TimerThread::TaskId sleep_id; sleep_id = get_global_timer_thread()->schedule( @@ -777,7 +815,7 @@ void TaskGroup::_add_sleep_event(void* void_args) { if (!sleep_id) { e.meta->sleep_failed = true; // Fail to schedule timer, go back to previous thread. - g->ready_to_run(e.tid); + g->ready_to_run(e.meta); return; } @@ -801,7 +839,7 @@ void TaskGroup::_add_sleep_event(void* void_args) { // schedule previous thread as well. If sleep_id does not exist, // previous thread is scheduled by timer thread before and we don't // have to do it again. - g->ready_to_run(e.tid); + g->ready_to_run(e.meta); } } @@ -904,12 +942,12 @@ int TaskGroup::interrupt(bthread_t tid, TaskControl* c, bthread_tag_t tag) { if (get_global_timer_thread()->unschedule(sleep_id) == 0) { bthread::TaskGroup* g = bthread::tls_task_group; if (g) { - g->ready_to_run(tid); + g->ready_to_run(TaskGroup::address_meta(tid)); } else { if (!c) { return EINVAL; } - c->choose_one_group(tag)->ready_to_run_remote(tid); + c->choose_one_group(tag)->ready_to_run_remote(TaskGroup::address_meta(tid)); } } } @@ -918,7 +956,7 @@ int TaskGroup::interrupt(bthread_t tid, TaskControl* c, bthread_tag_t tag) { void TaskGroup::yield(TaskGroup** pg) { TaskGroup* g = *pg; - ReadyToRunArgs args = { g->current_tid(), false }; + ReadyToRunArgs args = { g->_cur_meta, false }; g->set_remained(ready_to_run_in_worker, &args); sched(pg); } @@ -940,6 +978,9 @@ void print_task(std::ostream& os, bthread_t tid) { bool has_tls = false; int64_t cpuwide_start_ns = 0; TaskStatistics stat = {0, 0}; + TaskStatus status = TASK_STATUS_UNKNOWN; + bool traced = false; + pid_t worker_tid = 0; { BAIDU_SCOPED_LOCK(m->version_lock); if (given_ver == *m->version_butex) { @@ -953,6 +994,9 @@ void print_task(std::ostream& os, bthread_t tid) { has_tls = m->local_storage.keytable; cpuwide_start_ns = m->cpuwide_start_ns; stat = m->stat; + status = m->status; + traced = m->traced; + worker_tid = m->worker_tid; } } if (!matched) { @@ -969,7 +1013,13 @@ void print_task(std::ostream& os, bthread_t tid) { << "}\nhas_tls=" << has_tls << "\nuptime_ns=" << butil::cpuwide_time_ns() - cpuwide_start_ns << "\ncputime_ns=" << stat.cputime_ns - << "\nnswitch=" << stat.nswitch; + << "\nnswitch=" << stat.nswitch +#ifdef BRPC_BTHREAD_TRACER + << "\nstatus=" << status + << "\ntraced=" << traced + << "\nworker_tid=" << worker_tid; +#endif // BRPC_BTHREAD_TRACER + ; } } diff --git a/src/bthread/task_group.h b/src/bthread/task_group.h index a19bd023f7..d68c0dbb67 100644 --- a/src/bthread/task_group.h +++ b/src/bthread/task_group.h @@ -83,7 +83,7 @@ class TaskGroup { // then being popped by sched(pg), which is not necessary. static void sched_to(TaskGroup** pg, TaskMeta* next_meta); static void sched_to(TaskGroup** pg, bthread_t next_tid); - static void exchange(TaskGroup** pg, bthread_t next_tid); + static void exchange(TaskGroup** pg, TaskMeta* next_meta); // The callback will be run in the beginning of next-run bthread. // Can't be called by current bthread directly because it often needs @@ -151,18 +151,18 @@ class TaskGroup { int64_t cumulated_cputime_ns() const { return _cumulated_cputime_ns; } // Push a bthread into the runqueue - void ready_to_run(bthread_t tid, bool nosignal = false); + void ready_to_run(TaskMeta* meta, bool nosignal = false); // Flush tasks pushed to rq but signalled. void flush_nosignal_tasks(); // Push a bthread into the runqueue from another non-worker thread. - void ready_to_run_remote(bthread_t tid, bool nosignal = false); + void ready_to_run_remote(TaskMeta* meta, bool nosignal = false); void flush_nosignal_tasks_remote_locked(butil::Mutex& locked_mutex); void flush_nosignal_tasks_remote(); // Automatically decide the caller is remote or local, and call // the corresponding function. - void ready_to_run_general(bthread_t tid, bool nosignal = false); + void ready_to_run_general(TaskMeta* meta, bool nosignal = false); void flush_nosignal_tasks_general(); // The TaskControl that this TaskGroup belongs to. @@ -189,6 +189,8 @@ class TaskGroup { bthread_tag_t tag() const { return _tag; } + pid_t tid() const { return _tid; } + private: friend class TaskControl; @@ -197,7 +199,7 @@ friend class TaskControl; int init(size_t runqueue_capacity); - // You shall call destroy_self() instead of destructor because deletion + // You shall call destroy_selfm() instead of destructor because deletion // of groups are postponed to avoid race. ~TaskGroup(); @@ -207,7 +209,7 @@ friend class TaskControl; static void _release_last_context(void*); static void _add_sleep_event(void*); struct ReadyToRunArgs { - bthread_t tid; + TaskMeta* meta; bool nosignal; }; static void ready_to_run_in_worker(void*); @@ -262,6 +264,9 @@ friend class TaskControl; int _sched_recursive_guard; // tag of this taskgroup bthread_tag_t _tag; + + // Worker thread id. + pid_t _tid; }; } // namespace bthread diff --git a/src/bthread/task_group_inl.h b/src/bthread/task_group_inl.h index 45626ceb49..75c377e12f 100644 --- a/src/bthread/task_group_inl.h +++ b/src/bthread/task_group_inl.h @@ -46,17 +46,17 @@ inline TaskMeta* TaskGroup::address_meta(bthread_t tid) { return address_resource(get_slot(tid)); } -inline void TaskGroup::exchange(TaskGroup** pg, bthread_t next_tid) { +inline void TaskGroup::exchange(TaskGroup** pg, TaskMeta* next_meta) { TaskGroup* g = *pg; if (g->is_current_pthread_task()) { - return g->ready_to_run(next_tid); + return g->ready_to_run(next_meta); } - ReadyToRunArgs args = { g->current_tid(), false }; + ReadyToRunArgs args = { g->_cur_meta, false }; g->set_remained((g->current_task()->about_to_quit ? ready_to_run_in_worker_ignoresignal : ready_to_run_in_worker), &args); - TaskGroup::sched_to(pg, next_tid); + TaskGroup::sched_to(pg, next_meta); } inline void TaskGroup::sched_to(TaskGroup** pg, bthread_t next_tid) { diff --git a/src/bthread/task_meta.h b/src/bthread/task_meta.h index 5e5c4541d1..0367787f86 100644 --- a/src/bthread/task_meta.h +++ b/src/bthread/task_meta.h @@ -27,6 +27,7 @@ #include "butil/atomicops.h" // butil::atomic #include "bthread/types.h" // bthread_attr_t #include "bthread/stack.h" // ContextualStack +#include "bthread/timer_thread.h" namespace bthread { @@ -48,59 +49,75 @@ struct LocalStorage { const static LocalStorage LOCAL_STORAGE_INIT = BTHREAD_LOCAL_STORAGE_INITIALIZER; +enum TaskStatus { + TASK_STATUS_UNKNOWN, + TASK_STATUS_CREATED, + TASK_STATUS_FIRST_READY, + TASK_STATUS_READY, + TASK_STATUS_JUMPING, + TASK_STATUS_RUNNING, + TASK_STATUS_SUSPENDED, + TASK_STATUS_END, +}; + struct TaskMeta { // [Not Reset] - butil::atomic current_waiter; - uint64_t current_sleep; + butil::atomic current_waiter{NULL}; + uint64_t current_sleep{TimerThread::INVALID_TASK_ID}; // A flag to mark if the Timer scheduling failed. - bool sleep_failed; + bool sleep_failed{false}; // A builtin flag to mark if the thread is stopping. - bool stop; + bool stop{false}; // The thread is interrupted and should wake up from some blocking ops. - bool interrupted; + bool interrupted{false}; // Scheduling of the thread can be delayed. - bool about_to_quit; + bool about_to_quit{false}; // [Not Reset] guarantee visibility of version_butex. - pthread_spinlock_t version_lock; + pthread_spinlock_t version_lock{}; // [Not Reset] only modified by one bthread at any time, no need to be atomic - uint32_t* version_butex; + uint32_t* version_butex{NULL}; // The identifier. It does not have to be here, however many code is // simplified if they can get tid from TaskMeta. - bthread_t tid; + bthread_t tid{INVALID_BTHREAD}; // User function and argument - void* (*fn)(void*); - void* arg; + void* (*fn)(void*){NULL}; + void* arg{NULL}; // Stack of this task. - ContextualStack* stack; + ContextualStack* stack{NULL}; // Attributes creating this task - bthread_attr_t attr; + bthread_attr_t attr{BTHREAD_ATTR_NORMAL}; // Statistics - int64_t cpuwide_start_ns; - TaskStatistics stat; + int64_t cpuwide_start_ns{0}; + TaskStatistics stat{}; // bthread local storage, sync with tls_bls (defined in task_group.cpp) // when the bthread is created or destroyed. // DO NOT use this field directly, use tls_bls instead. - LocalStorage local_storage; + LocalStorage local_storage{}; + + // Only used when TaskTracer is enabled. + // Bthread status. + TaskStatus status{TASK_STATUS_UNKNOWN}; + // Whether bthread is traced? + bool traced{false}; + // Worker thread id. + pid_t worker_tid{-1}; public: // Only initialize [Not Reset] fields, other fields will be reset in // bthread_start* functions - TaskMeta() - : current_waiter(NULL) - , current_sleep(0) - , stack(NULL) { + TaskMeta() { pthread_spin_init(&version_lock, 0); version_butex = butex_create_checked(); *version_butex = 1; diff --git a/src/bthread/task_tracer.cpp b/src/bthread/task_tracer.cpp new file mode 100644 index 0000000000..ed786d8df0 --- /dev/null +++ b/src/bthread/task_tracer.cpp @@ -0,0 +1,491 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifdef BRPC_BTHREAD_TRACER + +#include "bthread/task_tracer.h" +#include "butil/debug/stack_trace.h" +#include "butil/memory/scope_guard.h" +#include "bthread/task_group.h" +#include "bthread/processor.h" + +namespace bthread { + +DEFINE_bool(enable_fast_unwind, true, "Whether to enable fast unwind"); +DEFINE_uint32(signal_trace_timeout_ms, 50, "Timeout for signal trace in ms"); + +int TaskTracer::Init() { + if (RegisterSignalHandler() != 0) { + return -1; + } + if (_trace_time.expose("bthread_trace_time") != 0) { + return -1; + } + if (_unwind_time.expose("bthread_unwind_time") != 0) { + return -1; + } + if (_signal_handler_time.expose("bthread_signal_handler_time") != 0) { + return -1; + } + return 0; +} + +void TaskTracer::set_status(TaskStatus s, TaskMeta* m) { + CHECK_NE(TASK_STATUS_RUNNING, s) << "Use `set_running_status' instead"; + CHECK_NE(TASK_STATUS_END, s) << "Use `set_end_status_unsafe' instead"; + + bool tracing; + { + BAIDU_SCOPED_LOCK(m->version_lock); + if (TASK_STATUS_UNKNOWN == m->status && TASK_STATUS_JUMPING == s) { + // Do not update status for jumping when bthread is ending. + return; + } + + tracing = m->traced; + // bthread is scheduled for the first time. + if (TASK_STATUS_READY == s || NULL == m->stack) { + m->status = TASK_STATUS_FIRST_READY; + } else { + m->status = s; + } + if (TASK_STATUS_CREATED == s) { + m->worker_tid = -1; + } + } + + // Make sure bthread does not jump stack when it is being traced. + if (tracing && TASK_STATUS_JUMPING == s) { + WaitForTracing(m); + } +} + +void TaskTracer::set_running_status(pid_t worker_tid, TaskMeta* m) { + BAIDU_SCOPED_LOCK(m->version_lock); + m->worker_tid = worker_tid; + m->status = TASK_STATUS_RUNNING; +} + +bool TaskTracer::set_end_status_unsafe(TaskMeta* m) { + m->status = TASK_STATUS_END; + return m->traced; +} + +std::string TaskTracer::Trace(bthread_t tid) { + Result result = TraceImpl(tid); + return result.error ? result.err_msg : ToString(result); +} + +void TaskTracer::Trace(std::ostream& os, bthread_t tid) { + Result result = TraceImpl(tid); + if (result.error) { + os << result.err_msg; + } else { + ToString(result, os); + } +} + +TaskTracer::Result TaskTracer::TraceImpl(bthread_t tid) { + butil::Timer timer(butil::Timer::STARTED); + BRPC_SCOPE_EXIT { + timer.stop(); + _trace_time << timer.n_elapsed(); + LOG(INFO) << "Trace time: " << timer.u_elapsed(); + }; + + if (tid == bthread_self()) { + return Result::MakeErrorResult("Can not trace self=%d", tid); + } + + // Make sure only one bthread is traced at a time. + BAIDU_SCOPED_LOCK(_trace_request_mutex); + + TaskMeta* m = TaskGroup::address_meta(tid); + if (NULL == m) { + return Result::MakeErrorResult("bthread=%d never existed", tid); + } + + BAIDU_SCOPED_LOCK(_mutex); + TaskStatus status; + pid_t worker_tid; + const uint32_t given_version = get_version(tid); + { + BAIDU_SCOPED_LOCK(m->version_lock); + if (given_version == *m->version_butex) { + // Start tracing. + m->traced = true; + worker_tid = m->worker_tid; + status = m->status; + } else { + return Result::MakeErrorResult("bthread=%d not exist now", tid); + } + } + + if (TASK_STATUS_UNKNOWN == status) { + return Result::MakeErrorResult("bthread=%d not exist now", tid); + } else if (TASK_STATUS_CREATED == status) { + return Result::MakeErrorResult("bthread=%d has just been created", tid); + } else if (TASK_STATUS_FIRST_READY == status) { + return Result::MakeErrorResult("bthread=%d is scheduled for the first time", tid); + } else if (TASK_STATUS_END == status) { + return Result::MakeErrorResult("bthread=%d has ended", tid); + } else if (TASK_STATUS_JUMPING == status) { + // Wait for jumping completion. + status = WaitForJumping(m); + } + + // After jumping, the status may be RUNNING, SUSPENDED, or READY, which is traceable. + + Result result{}; + if (TASK_STATUS_RUNNING == status) { + result = SignalTrace(worker_tid); + } else if (TASK_STATUS_SUSPENDED == status || TASK_STATUS_READY == status) { + result = ContextTrace(m->stack->context); + } + + { + BAIDU_SCOPED_LOCK(m->version_lock); + // If m->status is BTHREAD_STATUS_END, the bthread also waits for tracing completion, + // so given_version != *m->version_butex is OK. + m->traced = false; + } + // Wake up the waiting worker thread to jump. + _cond.Signal(); + + return result; +} + +void TaskTracer::SignalSafeUsleep(unsigned int microseconds) { + ErrnoGuard guard; + struct timespec sleep_time{}; + sleep_time.tv_sec = microseconds / 1000000; + sleep_time.tv_nsec = (microseconds % 1000000) * 1000; + // On Linux, sleep() is implemented via nanosleep(2). + // sleep() is async-signal-safety, so nanosleep() is considered as async-signal-safety on Linux. + // abseil-cpp and folly also use nanosleep() in signal handler. For details, see: + // 1. abseil-cpp: https://github.com/abseil/abseil-cpp/blob/27a0c7308f04e4560fabe5a7beca837e8f3f2c5b/absl/debugging/failure_signal_handler.cc#L314 + // 2. folly: https://github.com/facebook/folly/blob/479de0144d3acb6aa4b3483affa23cf4f49f07ee/folly/debugging/symbolizer/SignalHandler.cpp#L446 + while (nanosleep(&sleep_time, &sleep_time) == -1 && EINTR == errno); +} + +void TaskTracer::WaitForTracing(TaskMeta* m) { + BAIDU_SCOPED_LOCK(_mutex); + while (m->traced) { + _cond.Wait(); + } +} + +TaskStatus TaskTracer::WaitForJumping(TaskMeta* m) { + // Reasons for not using locks here: + // 1. It is necessary to lock before jump_stack, unlock after jump_stack, + // which involves two different bthread and is prone to errors. + // 2. jump_stack is fast. + int i = 0; + do { + // The bthread is jumping now, spin until it finishes. + if (i++ < 30) { + cpu_relax(); + } else { + sched_yield(); + } + + BAIDU_SCOPED_LOCK(m->version_lock); + if (TASK_STATUS_JUMPING != m->status) { + return m->status; + } + } while (true); +} + +TaskTracer::Result TaskTracer::ContextTrace(bthread_fcontext_t fcontext) { + unw_cursor_t cursor = MakeCursor(fcontext); + return TraceCore(cursor); +} + +int TaskTracer::RegisterSignalHandler() { + // Set up the signal handler. + struct sigaction old_sa{}; + struct sigaction sa{}; + sa.sa_sigaction = SignalHandler; + sa.sa_flags = SA_SIGINFO; + sigfillset(&sa.sa_mask); + if (sigaction(SIGURG, &sa, &old_sa) == -1) { + PLOG(ERROR) << "Failed to sigaction"; + return -1; + } + if (NULL != old_sa.sa_handler || NULL != old_sa.sa_sigaction) { + LOG(ERROR) << "SIGURG is already registered"; + return -1; + } + + return 0; +} + +void TaskTracer::SignalHandler(int, siginfo_t* info, void* context) { + ErrnoGuard guard; + TaskTracer* tracer = static_cast(info->si_value.sival_ptr); + if (NULL == tracer) { + // The signal is not from Tracer, such as TaskControl, do nothing. + return; + } + tracer->SignalTraceHandler(static_cast(context)); +} + +// Caution: This function is called in signal handler, so it should be async-signal-safety. +void TaskTracer::SignalTraceHandler(unw_context_t* context) { + // Something wrong, signal trace is not started, do nothing. + if (SIGNAL_TRACE_STATUS_START != + _signal_handler_flag.load(butil::memory_order_acquire)) { + return; + } + + butil::Timer timer(butil::Timer::STARTED); + BRPC_SCOPE_EXIT { + timer.stop(); + _signal_handler_time << timer.n_elapsed(); + }; + + _signal_handler_context = context; + // Use memory_order_seq_cst to ensure the flag is set before loop. + _signal_handler_flag.store(SIGNAL_TRACE_STATUS_TRACING, butil::memory_order_seq_cst); + while (SIGNAL_TRACE_STATUS_TRACING == + _signal_handler_flag.load(butil::memory_order_seq_cst)) { + SignalSafeUsleep(50); // 50us + // Timeout to avoid deadlock of libunwind. + timer.stop(); + if (timer.m_elapsed() > FLAGS_signal_trace_timeout_ms) { + break; + } + } +} + +TaskTracer::Result TaskTracer::SignalTrace(pid_t tid) { + _signal_handler_context = NULL; + // Use memory_order_seq_cst to ensure the flag is set before sending signal. + _signal_handler_flag.store(SIGNAL_TRACE_STATUS_START, butil::memory_order_seq_cst); + + // CAUTION: + // The signal handler will wait for the backtrace to complete. + // If the worker thread is interrupted when holding a resource(lock, etc), + // and this function waits for the resource during capturing backtraces, + // it may cause a deadlock. + // + // https://github.com/gperftools/gperftools/wiki/gperftools'-stacktrace-capturing-methods-and-their-issues#libunwind + // Generally, libunwind promises async-signal-safety for capturing backtraces. + // But in practice, it is only partially async-signal-safe due to reliance on + // dl_iterate_phdr API, which is used to enumerate all loaded ELF modules + // (.so files and main executable binary). No libc offers dl_iterate_pdhr that + // is async-signal-safe. In practice, the issue may happen if we take tracing + // signal during an existing dl_iterate_phdr call (such as when the program + // throws an exception) or during dlopen/dlclose-ing some .so module. + // Deadlock call stack: + // #0 __lll_lock_wait (futex=futex@entry=0x7f0d3d7f0990 <_rtld_global+2352>, private=0) at lowlevellock.c:52 + // #1 0x00007f0d3a73c131 in __GI___pthread_mutex_lock (mutex=0x7f0d3d7f0990 <_rtld_global+2352>) at ../nptl/pthread_mutex_lock.c:115 + // #2 0x00007f0d38eb0231 in __GI___dl_iterate_phdr (callback=callback@entry=0x7f0d38c456a0 <_ULx86_64_dwarf_callback>, data=data@entry=0x7f0d07defad0) at dl-iteratephdr.c:40 + // #3 0x00007f0d38c45d79 in _ULx86_64_dwarf_find_proc_info (as=0x7f0d38c4f340 , ip=ip@entry=139694791966897, pi=pi@entry=0x7f0d07df0498, need_unwind_info=need_unwind_info@entry=1, arg=0x7f0 + // d07df0340) at dwarf/Gfind_proc_info-lsb.c:759 + // #4 0x00007f0d38c43260 in fetch_proc_info (c=c@entry=0x7f0d07df0340, ip=139694791966897) at dwarf/Gparser.c:461 + // #5 0x00007f0d38c44e46 in find_reg_state (sr=0x7f0d07defd10, c=0x7f0d07df0340) at dwarf/Gparser.c:925 + // #6 _ULx86_64_dwarf_step (c=c@entry=0x7f0d07df0340) at dwarf/Gparser.c:972 + // #7 0x00007f0d38c40c14 in _ULx86_64_step (cursor=cursor@entry=0x7f0d07df0340) at x86_64/Gstep.c:71 + // #8 0x00007f0d399ed8f6 in GetStackTraceWithContext_libunwind (result=, max_depth=63, skip_count=132054887, ucp=) at src/stacktrace_libunwind-inl.h:138 + // #9 0x00007f0d399ee083 in GetStackTraceWithContext (result=0x7f0d07df07b8, max_depth=63, skip_count=3, uc=0x7f0d07df0a40) at src/stacktrace.cc:305 + // #10 0x00007f0d399ea992 in CpuProfiler::prof_handler (signal_ucontext=, cpu_profiler=0x7f0d399f6600, sig=) at src/profiler.cc:359 + // #11 0x00007f0d399eb633 in ProfileHandler::SignalHandler (sig=27, sinfo=0x7f0d07df0b70, ucontext=0x7f0d07df0a40) at src/profile-handler.cc:530 + // #12 + // #13 0x00007f0d3a73c0b1 in __GI___pthread_mutex_lock (mutex=0x7f0d3d7f0990 <_rtld_global+2352>) at ../nptl/pthread_mutex_lock.c:115 + // #14 0x00007f0d38eb0231 in __GI___dl_iterate_phdr (callback=0x7f0d38f525f0, data=0x7f0d07df10c0) at dl-iteratephdr.c:40 + // #15 0x00007f0d38f536c1 in _Unwind_Find_FDE () from /lib/x86_64-linux-gnu/libgcc_s.so.1 + // #16 0x00007f0d38f4f868 in ?? () from /lib/x86_64-linux-gnu/libgcc_s.so.1 + // #17 0x00007f0d38f50a20 in ?? () from /lib/x86_64-linux-gnu/libgcc_s.so.1 + // #18 0x00007f0d38f50f99 in _Unwind_RaiseException () from /lib/x86_64-linux-gnu/libgcc_s.so.1 + // #19 0x00007f0d390088dc in __cxa_throw () from /lib/x86_64-linux-gnu/libstdc++.so.6 + // #20 0x00007f0d3b5b2245 in __cxxabiv1::__cxa_throw (thrownException=0x7f0d114ea8c0, type=0x7f0d3d6dd830 , destructor=) at /src/folly/folly/experimental/exception_tracer/ExceptionTracerLib.cpp:107 + // + // Therefore, we do not capture backtracks in the signal handler to avoid mutex + // reentry and deadlock. Instead, we capture backtracks in this function and + // ends the signal handler after capturing backtraces is complete. + // Even so, there is still a deadlock problem: + // the worker thread is interrupted when during an existing dl_iterate_phdr call, + // and wait for the capturing backtraces to complete. This function capture + // backtracks with dl_iterate_phdr. We introduce a timeout mechanism in signal + // handler to avoid deadlock. + + union sigval value{}; + value.sival_ptr = this; + size_t sigqueue_try = 0; + while (sigqueue(tid, SIGURG, value) != 0) { + if (errno != EAGAIN || sigqueue_try++ >= 3) { + return Result::MakeErrorResult("Fail to sigqueue: %s", berror()); + } + } + + butil::Timer timer(butil::Timer::STARTED); + // Use memory_order_seq_cst to ensure the signal is sent and the flag is set before checking. + for (int i = 0; + SIGNAL_TRACE_STATUS_START == _signal_handler_flag.load(butil::memory_order_seq_cst); + ++i) { + if (i < 30) { + sched_yield(); + } else { + SignalSafeUsleep(5); // 5us + } + + // Timeout to avoid dead loop if handler of SIGURG is covered. + timer.stop(); + if (timer.m_elapsed() > FLAGS_signal_trace_timeout_ms) { + return Result::MakeErrorResult( + "Timeout exceed %dms", FLAGS_signal_trace_timeout_ms); + } + } + + butil::Timer timer2(butil::Timer::STARTED); + unw_cursor_t cursor; + int rc = unw_init_local(&cursor, _signal_handler_context); + Result result; + if (0 == rc) { + result = TraceCore(cursor); + } + + // Use memory_order_seq_cst to ensure the flag is set after tracing. + _signal_handler_flag.store(SIGNAL_TRACE_STATUS_UNKNOWN, butil::memory_order_seq_cst); + timer2.stop(); + LOG(INFO) << "Signal trace time: " << timer2.u_elapsed(); + + return 0 == rc ? result : Result::MakeErrorResult("Failed to init local, rc=%d", rc); +} + +unw_cursor_t TaskTracer::MakeCursor(bthread_fcontext_t fcontext) { + unw_cursor_t cursor; + unw_init_local(&cursor, &_context); + auto regs = reinterpret_cast(fcontext); + + // Only need RBP, RIP, RSP. + // The base pointer (RBP). + if (unw_set_reg(&cursor, UNW_X86_64_RBP, regs[6]) != 0) { + LOG(ERROR) << "Fail to set RBP"; + } + // The instruction pointer (RIP). + if (unw_set_reg(&cursor, UNW_REG_IP, regs[7]) != 0) { + LOG(ERROR) << "Fail to set RIP"; + } +#if UNW_VERSION_MAJOR >= 1 && UNW_VERSION_MINOR >= 7 + // The stack pointer (RSP). + if (unw_set_reg(&cursor, UNW_REG_SP, regs[8]) != 0) { + LOG(ERROR) << "Fail to set RSP"; + } +#endif + + return cursor; +} + +TaskTracer::Result TaskTracer::TraceCore(unw_cursor_t& cursor) { + butil::Timer timer(butil::Timer::STARTED); + BRPC_SCOPE_EXIT { + timer.stop(); + _unwind_time << timer.n_elapsed(); + }; + + Result result{}; + result.fast_unwind = FLAGS_enable_fast_unwind; + for (result.frame_count = 0; result.frame_count < arraysize(result.ips); ++result.frame_count) { + int rc = unw_step(&cursor); + if (0 == rc) { + break; + } else if (rc < 0) { + return Result::MakeErrorResult("unw_step rc=%d", rc); + } + + unw_word_t ip = 0; + // Fast unwind do not care about the return value. + rc = unw_get_reg(&cursor, UNW_REG_IP, &ip); + result.ips[result.frame_count] = ip; + + if (!result.fast_unwind) { + continue; + } + + if (0 != rc) { + butil::strings::SafeSPrintf(result.mangled[result.frame_count], "\0"); + continue; + } + + // Slow path. + rc = unw_get_proc_name( + &cursor, result.mangled[result.frame_count], sizeof(result.mangled[result.frame_count]), NULL); + // UNW_ENOMEM is ok. + if (0 != rc && UNW_ENOMEM != rc) { + butil::strings::SafeSPrintf(result.mangled[result.frame_count], "\0"); + } + } + + return result; +} + +std::string TaskTracer::ToString(const Result& result) { + if (result.frame_count == 0) { + return "No frame"; + } + + if (!result.fast_unwind) { + butil::debug::StackTrace stack_trace((void**)&result.ips, result.frame_count); + return stack_trace.ToString(); + } + + std::string result_str; + result_str.reserve(1024); + for (size_t i = 0; i < result.frame_count; ++i) { + butil::string_appendf(&result_str, "#%zu 0x%016lx ", i, result.ips[i]); + if (strlen(result.mangled[i]) == 0) { + result_str.append(""); + } else { + result_str.append(butil::demangle(result.mangled[i])); + } + if (i + 1 < result.frame_count) { + result_str.push_back('\n'); + } + } + return result_str; +} + +void TaskTracer::ToString(const Result& result, std::ostream& out_os) { + if (result.frame_count == 0) { + out_os << "No frame"; + return; + } + + if (!result.fast_unwind) { + butil::debug::StackTrace stack_trace((void**)&result.ips, result.frame_count); + stack_trace.OutputToStream(&out_os); + return; + } + + for (size_t i = 0; i < result.frame_count; ++i) { + out_os << "# " << i << " 0x" << std::hex << result.ips[i] << std::dec << " "; + if (strlen(result.mangled[i]) == 0) { + out_os << ""; + } else { + out_os << butil::demangle(result.mangled[i]); + } + if (i + 1 < result.frame_count) { + out_os << '\n'; + } + } +} + +} // namespace bthread + +#endif // BRPC_BTHREAD_TRACER diff --git a/src/bthread/task_tracer.h b/src/bthread/task_tracer.h new file mode 100644 index 0000000000..48c54ac798 --- /dev/null +++ b/src/bthread/task_tracer.h @@ -0,0 +1,134 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef BRPC_BTHREAD_TRACER_H +#define BRPC_BTHREAD_TRACER_H + +#ifdef BRPC_BTHREAD_TRACER + +#include +#include +#include +#include "butil/strings/safe_sprintf.h" +#include "butil/synchronization/condition_variable.h" +#include "bthread/task_meta.h" +#include "bthread/mutex.h" + +namespace bthread { + +// Tracer for bthread. +class TaskTracer { +public: + int Init(); + // Set the status to `s'. + void set_status(TaskStatus s, TaskMeta* meta); + static void set_running_status(pid_t worker_tid, TaskMeta* meta); + static bool set_end_status_unsafe(TaskMeta* m); + + // Async signal safe usleep. + static void SignalSafeUsleep(unsigned int microseconds); + + // Trace the bthread of `tid'. + std::string Trace(bthread_t tid); + void Trace(std::ostream& os, bthread_t tid); + + // When the worker is jumping stack from a bthread to another, + void WaitForTracing(TaskMeta* m); + +private: + // Error number guard used in signal handler. + class ErrnoGuard { + public: + ErrnoGuard() : _errno(errno) {} + ~ErrnoGuard() { errno = _errno; } + private: + int _errno; + }; + + enum SignalTraceStatus { + SIGNAL_TRACE_STATUS_UNKNOWN = 0, + SIGNAL_TRACE_STATUS_START, + SIGNAL_TRACE_STATUS_TRACING, + }; + + struct Result { + template + static Result MakeErrorResult(const char* fmt, Args... args) { + Result result{}; + result.error = true; + butil::strings::SafeSPrintf(result.err_msg, fmt, args...); + return result; + } + + static const size_t MAX_TRACE_NUM = 64; + unw_word_t ips[MAX_TRACE_NUM]; + size_t frame_count{0}; + bool error{false}; + union { + char mangled[MAX_TRACE_NUM][256]{}; + char err_msg[256]; + }; + + bool fast_unwind{false}; + }; + + Result TraceImpl(bthread_t tid); + + static TaskStatus WaitForJumping(TaskMeta* m); + + Result ContextTrace(bthread_fcontext_t fcontext); + + // Register signal handler for signal trace. + static int RegisterSignalHandler(); + static void SignalHandler(int sig, siginfo_t* info, void* context); + void SignalTraceHandler(unw_context_t* context); + Result SignalTrace(pid_t worker_tid); + + unw_cursor_t MakeCursor(bthread_fcontext_t fcontext); + Result TraceCore(unw_cursor_t& cursor); + + static std::string ToString(const Result& result); + static void ToString(const Result& result, std::ostream& out_os); + + // Make sure only one bthread is traced at a time. + bthread::Mutex _trace_request_mutex; + + // For signal trace. + // Make sure bthread does not jump stack when it is being traced. + butil::Mutex _mutex; + butil::ConditionVariable _cond{&_mutex}; + + // For context trace. + unw_context_t _context{}; + // For signal trace. + unw_context_t* _signal_handler_context{NULL}; + butil::atomic _signal_handler_flag{SIGNAL_TRACE_STATUS_UNKNOWN}; + + // Protect `_worker_tids'. + butil::Mutex _worker_mutex; + std::vector _worker_tids; + + bvar::LatencyRecorder _trace_time{"bthread_trace_time"}; + bvar::LatencyRecorder _unwind_time{"bthread_unwind_time"}; + bvar::LatencyRecorder _signal_handler_time{"bthread_signal_handler_time"}; +}; + +} // namespace bthread + +#endif // BRPC_BTHREAD_TRACER + +#endif // BRPC_BTHREAD_TRACER_H diff --git a/src/butil/debug/stack_trace.cc b/src/butil/debug/stack_trace.cc index 38abede980..97a4cd7683 100644 --- a/src/butil/debug/stack_trace.cc +++ b/src/butil/debug/stack_trace.cc @@ -29,17 +29,18 @@ const void *const *StackTrace::Addresses(size_t* count) const { } size_t StackTrace::CopyAddressTo(void** buffer, size_t max_nframes) const { - size_t nframes = std::min(count_, max_nframes); - memcpy(buffer, trace_, nframes * sizeof(void*)); - return nframes; + size_t nframes = std::min(count_, max_nframes); + memcpy(buffer, trace_, nframes * sizeof(void*)); + return nframes; } std::string StackTrace::ToString() const { - std::stringstream stream; + std::string str; + str.reserve(1024); #if !defined(__UCLIBC__) - OutputToStream(&stream); + OutputToString(str); #endif - return stream.str(); + return str; } } // namespace debug diff --git a/src/butil/debug/stack_trace.h b/src/butil/debug/stack_trace.h index 5c6545add4..e812058321 100644 --- a/src/butil/debug/stack_trace.h +++ b/src/butil/debug/stack_trace.h @@ -79,6 +79,7 @@ class BUTIL_EXPORT StackTrace { #if !defined(__UCLIBC__) // Resolves backtrace to symbols and write to stream. void OutputToStream(std::ostream* os) const; + void OutputToString(std::string& str) const; #endif // Resolves backtrace to symbols and returns as string. diff --git a/src/butil/debug/stack_trace_posix.cc b/src/butil/debug/stack_trace_posix.cc index 878f94a7c8..9ef91c2f0b 100644 --- a/src/butil/debug/stack_trace_posix.cc +++ b/src/butil/debug/stack_trace_posix.cc @@ -429,6 +429,23 @@ class StreamBacktraceOutputHandler : public BacktraceOutputHandler { DISALLOW_COPY_AND_ASSIGN(StreamBacktraceOutputHandler); }; +class StringBacktraceOutputHandler : public BacktraceOutputHandler { +public: + explicit StringBacktraceOutputHandler(std::string& str) : _str(str) {} + + DISALLOW_COPY_AND_ASSIGN(StringBacktraceOutputHandler); + + void HandleOutput(const char* output) OVERRIDE { + if (NULL == output) { + return; + } + _str.append(output); + } + +private: + std::string& _str; +}; + void WarmUpBacktrace() { // Warm up stack trace infrastructure. It turns out that on the first // call glibc initializes some internal data structures using pthread_once, @@ -801,6 +818,11 @@ void StackTrace::OutputToStream(std::ostream* os) const { StreamBacktraceOutputHandler handler(os); ProcessBacktrace(trace_, count_, &handler); } + +void StackTrace::OutputToString(std::string& str) const { + StringBacktraceOutputHandler handler(str); + ProcessBacktrace(trace_, count_, &handler); +} #endif namespace internal { diff --git a/test/brpc_builtin_service_unittest.cpp b/test/brpc_builtin_service_unittest.cpp index 178c3f9e5a..8ea17a471b 100644 --- a/test/brpc_builtin_service_unittest.cpp +++ b/test/brpc_builtin_service_unittest.cpp @@ -837,6 +837,17 @@ void* dummy_bthread(void*) { return NULL; } + +#ifdef BRPC_BTHREAD_TRACER +bool g_bthread_trace_stop = false; +void* bthread_trace(void*) { + while (!g_bthread_trace_stop) { + bthread_usleep(1000 * 100); + } + return NULL; +} +#endif // BRPC_BTHREAD_TRACER + TEST_F(BuiltinServiceTest, bthreads) { brpc::BthreadsService service; brpc::BthreadsRequest req; @@ -867,7 +878,25 @@ TEST_F(BuiltinServiceTest, bthreads) { service.default_method(&cntl, &req, &res, &done); EXPECT_FALSE(cntl.Failed()); CheckContent(cntl, "stop=0"); - } + } + +#ifdef BRPC_BTHREAD_TRACER + { + bthread_t th; + EXPECT_EQ(0, bthread_start_background(&th, NULL, bthread_trace, NULL)); + ClosureChecker done; + brpc::Controller cntl; + std::string id_string; + butil::string_printf(&id_string, "%llu?st=1", (unsigned long long)th); + cntl.http_request().uri().SetHttpURL("/bthreads/" + id_string); + cntl.http_request()._unresolved_path = id_string; + service.default_method(&cntl, &req, &res, &done); + g_bthread_trace_stop = true; + EXPECT_FALSE(cntl.Failed()); + CheckContent(cntl, "stop=0"); + CheckContent(cntl, "bthread_trace"); + } +#endif // BRPC_BTHREAD_TRACER } TEST_F(BuiltinServiceTest, sockets) { diff --git a/test/bthread_unittest.cpp b/test/bthread_unittest.cpp index 5ed8aba897..0283875eb6 100644 --- a/test/bthread_unittest.cpp +++ b/test/bthread_unittest.cpp @@ -20,14 +20,23 @@ #include "butil/time.h" #include "butil/macros.h" #include "butil/logging.h" -#include "butil/logging.h" #include "butil/gperftools_profiler.h" #include "bthread/bthread.h" #include "bthread/unstable.h" #include "bthread/task_meta.h" +int main(int argc, char* argv[]) { + testing::InitGoogleTest(&argc, argv); + GFLAGS_NS::ParseCommandLineFlags(&argc, &argv, true); + int rc = RUN_ALL_TESTS(); + return rc; +} + namespace bthread { - extern __thread bthread::LocalStorage tls_bls; +extern __thread bthread::LocalStorage tls_bls; +#ifdef BRPC_BTHREAD_TRACER +extern std::string stack_trace(bthread_t tid); +#endif // BRPC_BTHREAD_TRACER } namespace { @@ -608,4 +617,32 @@ TEST_F(BthreadTest, yield_single_thread) { ASSERT_EQ(0, bthread_join(tid, NULL)); } +#ifdef BRPC_BTHREAD_TRACER +TEST_F(BthreadTest, trace) { + stop = false; + bthread_t th; + ASSERT_EQ(0, bthread_start_urgent(&th, NULL, spin_and_log, (void*)1)); + usleep(100 * 1000); + std::string st = bthread::stack_trace(th); + LOG(INFO) << "spin_and_log stack trace:\n" << st; + ASSERT_NE(std::string::npos, st.find("spin_and_log")); + stop = true; + ASSERT_EQ(0, bthread_join(th, NULL)); + + stop = false; + ASSERT_EQ(0, bthread_start_urgent(&th, NULL, repeated_sleep, (void*)1)); + usleep(100 * 1000); + st = bthread::stack_trace(th); + LOG(INFO) << "repeated_sleep stack trace:\n" << st; + ASSERT_NE(std::string::npos, st.find("repeated_sleep")); + stop = true; + ASSERT_EQ(0, bthread_join(th, NULL)); + + st = bthread::stack_trace(th); + LOG(INFO) << "ended bthread stack trace:\n" << st; + ASSERT_NE(std::string::npos, st.find("not exist now")); + +} +#endif // BRPC_BTHREAD_TRACER + } // namespace