Skip to content

Commit

Permalink
enable brpc use rdma
Browse files Browse the repository at this point in the history
  • Loading branch information
Tuvie committed Jul 19, 2022
1 parent 41c80c7 commit 7d38c8f
Show file tree
Hide file tree
Showing 38 changed files with 6,508 additions and 136 deletions.
14 changes: 14 additions & 0 deletions BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,12 @@ config_setting(
visibility = ["//visibility:public"],
)

config_setting(
name = "with_rdma",
define_values = {"with_rdma": "true"},
visibility = ["//visibility:public"],
)

config_setting(
name = "unittest",
define_values = {"unittest": "true"},
Expand Down Expand Up @@ -73,6 +79,9 @@ COPTS = [
}) + select({
":with_thrift": ["-DENABLE_THRIFT_FRAMED_PROTOCOL=1"],
"//conditions:default": [""],
}) + select({
":with_rdma": ["-DBRPC_WITH_RDMA=1"],
"//conditions:default": [""],
})

LINKOPTS = [
Expand Down Expand Up @@ -105,6 +114,11 @@ LINKOPTS = [
"-levent",
"-lthrift"],
"//conditions:default": [],
}) + select({
":with_rdma": [
"-libverbs",
],
"//conditions:default": [],
})

genrule(
Expand Down
22 changes: 21 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ option(DEBUG "Print debug logs" OFF)
option(WITH_DEBUG_SYMBOLS "With debug symbols" ON)
option(WITH_THRIFT "With thrift framed protocol supported" OFF)
option(WITH_SNAPPY "With snappy" OFF)
option(WITH_RDMA "With RDMA" OFF)
option(BUILD_UNIT_TESTS "Whether to build unit tests" OFF)
option(BUILD_BRPC_TOOLS "Whether to build brpc tools" ON)
option(DOWNLOAD_GTEST "Download and build a fresh copy of googletest. Requires Internet access." ON)
Expand Down Expand Up @@ -68,6 +69,12 @@ if(WITH_THRIFT)
set(THRIFT_LIB "thrift")
endif()

set(WITH_RDMA_VAL "0")
if(WITH_RDMA)
set(WITH_RDMA_VAL "1")
set(BRPC_WITH_RDMA 1)
endif()

include(GNUInstallDirs)

configure_file(${PROJECT_SOURCE_DIR}/config.h.in ${PROJECT_SOURCE_DIR}/src/butil/config.h @ONLY)
Expand Down Expand Up @@ -106,7 +113,7 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Darwin")
set(CMAKE_CPP_FLAGS "${CMAKE_CPP_FLAGS} -Wno-deprecated-declarations -Wno-inconsistent-missing-override")
endif()

set(CMAKE_CPP_FLAGS "${CMAKE_CPP_FLAGS} ${DEFINE_CLOCK_GETTIME} -DBRPC_WITH_GLOG=${WITH_GLOG_VAL} -DGFLAGS_NS=${GFLAGS_NS}")
set(CMAKE_CPP_FLAGS "${CMAKE_CPP_FLAGS} ${DEFINE_CLOCK_GETTIME} -DBRPC_WITH_GLOG=${WITH_GLOG_VAL} -DBRPC_WITH_RDMA=${WITH_RDMA_VAL} -DGFLAGS_NS=${GFLAGS_NS}")
if(WITH_MESALINK)
set(CMAKE_CPP_FLAGS "${CMAKE_CPP_FLAGS} -DUSE_MESALINK")
endif()
Expand Down Expand Up @@ -182,6 +189,15 @@ if(WITH_MESALINK)
include_directories(${MESALINK_INCLUDE_PATH})
endif()

if(WITH_RDMA)
message("brpc compile with rdma")
find_path(RDMA_INCLUDE_PATH NAMES infiniband/verbs.h)
find_library(RDMA_LIB NAMES ibverbs)
if((NOT RDMA_INCLUDE_PATH) OR (NOT RDMA_LIB))
message(FATAL_ERROR "Fail to find ibverbs")
endif()
endif()

find_library(PROTOC_LIB NAMES protoc)
if(NOT PROTOC_LIB)
message(FATAL_ERROR "Fail to find protoc lib")
Expand Down Expand Up @@ -220,6 +236,10 @@ else()
list(APPEND DYNAMIC_LIB ${OPENSSL_SSL_LIBRARY})
endif()

if(WITH_RDMA)
list(APPEND DYNAMIC_LIB ${RDMA_LIB})
endif()

set(BRPC_PRIVATE_LIBS "-lgflags -lprotobuf -lleveldb -lprotoc -lssl -lcrypto -ldl -lz")

if(WITH_GLOG)
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ JSON2PB_DIRS = src/json2pb
JSON2PB_SOURCES = $(foreach d,$(JSON2PB_DIRS),$(wildcard $(addprefix $(d)/*,$(SRCEXTS))))
JSON2PB_OBJS = $(addsuffix .o, $(basename $(JSON2PB_SOURCES)))

BRPC_DIRS = src/brpc src/brpc/details src/brpc/builtin src/brpc/policy
BRPC_DIRS = src/brpc src/brpc/details src/brpc/builtin src/brpc/policy src/brpc/rdma
THRIFT_SOURCES = $(foreach d,$(BRPC_DIRS),$(wildcard $(addprefix $(d)/thrift*,$(SRCEXTS))))
BRPC_SOURCES_ALL = $(foreach d,$(BRPC_DIRS),$(wildcard $(addprefix $(d)/*,$(SRCEXTS))))
BRPC_SOURCES = $(filter-out $(THRIFT_SOURCES), $(BRPC_SOURCES_ALL))
Expand Down
15 changes: 14 additions & 1 deletion config_brpc.sh
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,10 @@ else
LDD=ldd
fi

TEMP=`getopt -o v: --long headers:,libs:,cc:,cxx:,with-glog,with-thrift,with-mesalink,nodebugsymbols -n 'config_brpc' -- "$@"`
TEMP=`getopt -o v: --long headers:,libs:,cc:,cxx:,with-glog,with-thrift,with-rdma,with-mesalink,nodebugsymbols -n 'config_brpc' -- "$@"`
WITH_GLOG=0
WITH_THRIFT=0
WITH_RDMA=0
WITH_MESALINK=0
DEBUGSYMBOLS=-g

Expand All @@ -64,6 +65,7 @@ while true; do
--cxx ) CXX=$2; shift 2 ;;
--with-glog ) WITH_GLOG=1; shift 1 ;;
--with-thrift) WITH_THRIFT=1; shift 1 ;;
--with-rdma) WITH_RDMA=1; shift 1 ;;
--with-mesalink) WITH_MESALINK=1; shift 1 ;;
--nodebugsymbols ) DEBUGSYMBOLS=; shift 1 ;;
-- ) shift; break ;;
Expand Down Expand Up @@ -346,6 +348,17 @@ if [ $WITH_THRIFT != 0 ]; then
fi
fi

if [ $WITH_RDMA != 0 ]; then
RDMA_LIB=$(find_dir_of_lib_or_die ibverbs)
RDMA_HDR=$(find_dir_of_header_or_die infiniband/verbs.h)
append_to_output_libs "$RDMA_LIB"
append_to_output_headers "$RDMA_HDR"

CPPFLAGS="${CPPFLAGS} -DBRPC_WITH_RDMA"

append_to_output "DYNAMIC_LINKINGS+=-libverbs"
fi

if [ $WITH_MESALINK != 0 ]; then
CPPFLAGS="${CPPFLAGS} -DUSE_MESALINK"
fi
Expand Down
50 changes: 50 additions & 0 deletions docs/cn/rdma.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# 编译

由于RDMA对驱动与硬件有要求,目前仅支持在Linux系统编译并运行RDMA功能。

使用config_brpc:
```bash
sh config_brpc.sh --with-rdma
make

cd example/rdma_performance # 示例程序
make
```

使用cmake:
```bash
mkdir bld && cd bld && cmake -DWITH_RDMA=ON ..
make

cd example/rdma_performance # 示例程序
mkdir bld && cd bld && cmake ..
make
```

# 基本实现

brpc内部使用RDMA RC模式,每个Socket对应一个QP。RDMA连接建立依赖于前置TCP建连,TCP建连后双方交换必要参数(含GID、QPN等),再发起RDMA连接并实现数据传输。建连用的TCP连接在RDMA连接活跃期间并不传输数据,但仍保持EST状态。一旦TCP连接中断,其上对应的RDMA连接同样会置错。

RDMA要求数据收发所使用的内存空间必须被注册(memory register),这一操作非常耗时,所以通常都会使用内存池方案来加速。brpc内部的数据收发都使用IOBuf,为了在兼容IOBuf的情况下实现完全零拷贝,整个IOBuf所使用的内存空间整体由统一内存池接管。注意,由于IOBuf内存池不由用户直接控制,因此实际使用中需要注意IOBuf所消耗的总内存,建议根据实际业务需求,一次性注册足够的内存池以实现性能最大化。

# 参数

可配置参数说明:
* rdma_trace_verbose: 日志中打印RDMA建连相关信息,默认false
* rdma_recv_zerocopy: 是否启用接收零拷贝,默认true
* rdma_zerocopy_min_size: 接收零拷贝最小的msg大小,默认512B
* rdma_recv_block_type: 为接收数据预准备的block类型,分为三类default(8KB)/large(64KB)/huge(2MB),默认为default
* rdma_prepared_qp_size: 程序启动预生成的QP的大小,默认128
* rdma_prepared_qp_cnt: 程序启动预生成的QP的数量,默认1024
* rdma_max_sge: 允许的最大发送SGList长度,默认为0,即采用硬件所支持的最大长度
* rdma_sq_size: SQ大小,默认128
* rdma_rq_size: RQ大小,默认128
* rdma_cqe_poll_once: 从CQ中一次性poll出的CQE数量,默认32
* rdma_gid_index: 使用本地GID表中的Index,默认为-1,即选用最大的可用GID Index
* rdma_port: 使用IB设备的port number,默认为1
* rdma_device: 使用IB设备的名称,默认为空,即使用第一个active的设备
* rdma_memory_pool_initial_size_mb: 内存池的初始大小,单位MB,默认1024
* rdma_memory_pool_increase_size_mb: 内存池每次动态增长的大小,单位MB,默认1024
* rdma_memory_pool_max_regions: 最大的内存池块数,默认16
* rdma_memory_pool_buckets: 内存池中为避免竞争采用的bucket数目,默认为4
* rdma_memory_pool_tls_cache_num: 内存池中thread local的缓存block数目,默认为128
50 changes: 50 additions & 0 deletions docs/en/rdma.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# Build

Since RDMA requires driver and hardware support, only the build on linux is verified.

With config_brpc:
```bash
sh config_brpc.sh --with-rdma
make

cd example/rdma_performance # example for rdma
make
```

With cmake:
```bash
mkdir bld && cd bld && cmake -DWITH_RDMA=ON ..
make

cd example/rdma_performance # example for rdma
mkdir bld && cd bld && cmake ..
make
```

# Basic Implementation

brpc uses RDMA RC mode. Every Socket has its own QP. Before establishing RDMA connection, a TCP connection is necessary to exchange some information such as GID and QPN. The TCP connection will keep in EST state but not be used for data transmission after RDMA connection is established. Once the TCP connection is closed, the corresponding RDMA connection will be set error.

All the memory used for data transmission in RDMA must be registered, which is very inefficient. Generally, a memory pool is employed to avoid frequent memory registration. In fact, brpc uses IOBuf for data transmission. In order to realize total zerocopy and compatibility with IOBuf, the memory used by IOBuf is taken over by the RDMA memory pool. Since IOBuf buffer cannot be controlled by user directly, the total memory consumption in IOBuf should be carefully managed. It is suggested that the application registers enough memory at one time according to its requirement.

# Parameters

Congifurable parameterss:
* rdma_trace_verbose: to print RDMA connection information in log,default is false
* rdma_recv_zerocopy: enable zero copy in receive side,default is true
* rdma_zerocopy_min_size: the min message size for receive zero copy (in Byte),default is 512
* rdma_recv_block_type: the block type used for receiving, can be default(8KB)/large(64KB)/huge(2MB),default is default
* rdma_prepared_qp_size: the size of QP created at the begining of the application,default is 128
* rdma_prepared_qp_cnt: the number of QPs created at the begining of the application,default is 1024
* rdma_max_sge: the max length of sglist, default is 0, which is the max length allowed by the device
* rdma_sq_size: the size of SQ,default is 128
* rdma_rq_size: the size of RQ,default is 128
* rdma_cqe_poll_once: the number of CQE pooled from CQ once,default is 32
* rdma_gid_index: the index of local GID table used,default is -1,which is the maximum GID index
* rdma_port: the port number used,default is 1
* rdma_device: the IB device name,default is empty,which is the first active device
* rdma_memory_pool_initial_size_mb: the initial region size of RDMA memory pool (in MB),default is 1024
* rdma_memory_pool_increase_size_mb: the step increase region size of RDMA memory pool (in MB),default is 1024
* rdma_memory_pool_max_regions: the max number of regions in RDMA memory pool,default is 16
* rdma_memory_pool_buckets: the number of buckets for avoiding mutex contention in RDMA memory pool,default is 4
* rdma_memory_pool_tls_cache_num: the number of thread local cached blocks in RDMA memory pool,default is 128
150 changes: 150 additions & 0 deletions example/rdma_performance/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

cmake_minimum_required(VERSION 2.8.10)
project(rdma_performance C CXX)

option(LINK_SO "Whether examples are linked dynamically" OFF)

execute_process(
COMMAND bash -c "find ${PROJECT_SOURCE_DIR}/../.. -type d -regex \".*output/include$\" | head -n1 | xargs dirname | tr -d '\n'"
OUTPUT_VARIABLE OUTPUT_PATH
)

set(CMAKE_PREFIX_PATH ${OUTPUT_PATH})

include(FindThreads)
include(FindProtobuf)
protobuf_generate_cpp(PROTO_SRC PROTO_HEADER test.proto)
# include PROTO_HEADER
include_directories(${CMAKE_CURRENT_BINARY_DIR})

# Search for libthrift* by best effort. If it is not found and brpc is
# compiled with thrift protocol enabled, a link error would be reported.
find_library(THRIFT_LIB NAMES thrift)
if (NOT THRIFT_LIB)
set(THRIFT_LIB "")
endif()
find_library(THRIFTNB_LIB NAMES thriftnb)
if (NOT THRIFTNB_LIB)
set(THRIFTNB_LIB "")
endif()

find_path(BRPC_INCLUDE_PATH NAMES brpc/server.h)
if(LINK_SO)
find_library(BRPC_LIB NAMES brpc)
else()
find_library(BRPC_LIB NAMES libbrpc.a brpc)
endif()
if((NOT BRPC_INCLUDE_PATH) OR (NOT BRPC_LIB))
message(FATAL_ERROR "Fail to find brpc")
endif()
include_directories(${BRPC_INCLUDE_PATH})

find_path(GFLAGS_INCLUDE_PATH gflags/gflags.h)
find_library(GFLAGS_LIBRARY NAMES gflags libgflags)
if((NOT GFLAGS_INCLUDE_PATH) OR (NOT GFLAGS_LIBRARY))
message(FATAL_ERROR "Fail to find gflags")
endif()
include_directories(${GFLAGS_INCLUDE_PATH})

execute_process(
COMMAND bash -c "grep \"namespace [_A-Za-z0-9]\\+ {\" ${GFLAGS_INCLUDE_PATH}/gflags/gflags_declare.h | head -1 | awk '{print $2}' | tr -d '\n'"
OUTPUT_VARIABLE GFLAGS_NS
)
if(${GFLAGS_NS} STREQUAL "GFLAGS_NAMESPACE")
execute_process(
COMMAND bash -c "grep \"#define GFLAGS_NAMESPACE [_A-Za-z0-9]\\+\" ${GFLAGS_INCLUDE_PATH}/gflags/gflags_declare.h | head -1 | awk '{print $3}' | tr -d '\n'"
OUTPUT_VARIABLE GFLAGS_NS
)
endif()
if(CMAKE_SYSTEM_NAME STREQUAL "Darwin")
include(CheckFunctionExists)
CHECK_FUNCTION_EXISTS(clock_gettime HAVE_CLOCK_GETTIME)
if(NOT HAVE_CLOCK_GETTIME)
set(DEFINE_CLOCK_GETTIME "-DNO_CLOCK_GETTIME_IN_MAC")
endif()
endif()

set(CMAKE_CPP_FLAGS "${DEFINE_CLOCK_GETTIME} -DGFLAGS_NS=${GFLAGS_NS} -DBRPC_WITH_RDMA=1")
set(CMAKE_CXX_FLAGS "${CMAKE_CPP_FLAGS} -DNDEBUG -O2 -D__const__= -pipe -W -Wall -Wno-unused-parameter -fPIC -fno-omit-frame-pointer")

if(CMAKE_VERSION VERSION_LESS "3.1.3")
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
endif()
if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
endif()
else()
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
endif()

find_path(LEVELDB_INCLUDE_PATH NAMES leveldb/db.h)
find_library(LEVELDB_LIB NAMES leveldb)
if ((NOT LEVELDB_INCLUDE_PATH) OR (NOT LEVELDB_LIB))
message(FATAL_ERROR "Fail to find leveldb")
endif()
include_directories(${LEVELDB_INCLUDE_PATH})

if(CMAKE_SYSTEM_NAME STREQUAL "Darwin")
set(OPENSSL_ROOT_DIR
"/usr/local/opt/openssl" # Homebrew installed OpenSSL
)
endif()

find_package(OpenSSL)
include_directories(${OPENSSL_INCLUDE_DIR})

find_path(RDMA_INCLUDE_PATH NAMES infiniband/verbs.h)
find_library(RDMA_LIB NAMES ibverbs)
if ((NOT RDMA_INCLUDE_PATH) OR (NOT RDMA_LIB))
message(FATAL_ERROR "Fail to find ibverbs")
endif()

set(DYNAMIC_LIB
${CMAKE_THREAD_LIBS_INIT}
${GFLAGS_LIBRARY}
${PROTOBUF_LIBRARIES}
${LEVELDB_LIB}
${OPENSSL_CRYPTO_LIBRARY}
${OPENSSL_SSL_LIBRARY}
${THRIFT_LIB}
${THRIFTNB_LIB}
dl
)

if(CMAKE_SYSTEM_NAME STREQUAL "Darwin")
set(DYNAMIC_LIB ${DYNAMIC_LIB}
pthread
"-framework CoreFoundation"
"-framework CoreGraphics"
"-framework CoreData"
"-framework CoreText"
"-framework Security"
"-framework Foundation"
"-Wl,-U,_MallocExtension_ReleaseFreeMemory"
"-Wl,-U,_ProfilerStart"
"-Wl,-U,_ProfilerStop")
endif()

add_executable(client client.cpp ${PROTO_SRC} ${PROTO_HEADER})
add_executable(server server.cpp ${PROTO_SRC} ${PROTO_HEADER})

target_link_libraries(client ${BRPC_LIB} ${DYNAMIC_LIB})
target_link_libraries(server ${BRPC_LIB} ${DYNAMIC_LIB})
Loading

0 comments on commit 7d38c8f

Please sign in to comment.