Skip to content

Commit

Permalink
use PLOG to print errno message.
Browse files Browse the repository at this point in the history
  • Loading branch information
doujiang24 committed Jan 7, 2025
1 parent ecfb9b0 commit bfc0f90
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 97 deletions.
4 changes: 0 additions & 4 deletions mooncake-transfer-engine/include/error.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@
#ifndef ERROR_H
#define ERROR_H

#include <string>

#define ERR_INVALID_ARGUMENT (-1)
#define ERR_TOO_MANY_REQUESTS (-2)
#define ERR_ADDRESS_NOT_REGISTERED (-3)
Expand All @@ -38,6 +36,4 @@
#define ERR_MEMORY (-302)
#define ERR_NOT_IMPLEMENTED (-303)

std::string getErrorString();

#endif // ERROR_H
27 changes: 0 additions & 27 deletions mooncake-transfer-engine/src/error.cpp

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,7 @@ RdmaContext::RdmaContext(RdmaTransport &engine, const std::string &device_name)
static std::once_flag g_once_flag;
auto fork_init = []() {
int ret = ibv_fork_init();
if (ret)
LOG(ERROR) << "RDMA context setup failed: fork compatibility: "
<< getErrorString();
if (ret) PLOG(ERROR) << "RDMA context setup failed: fork compatibility"
};
std::call_once(g_once_flag, fork_init);
}
Expand Down Expand Up @@ -90,7 +88,7 @@ int RdmaContext::construct(size_t num_cq_list, size_t num_comp_channels,

event_fd_ = epoll_create1(0);
if (event_fd_ < 0) {
PLOG(ERROR) << "Failed to create epoll: " << getErrorString();
PLOG(ERROR) << "Failed to create epoll";
return ERR_CONTEXT;
}

Expand Down Expand Up @@ -149,17 +147,15 @@ int RdmaContext::deconstruct() {
for (auto &entry : memory_region_list_) {
int ret = ibv_dereg_mr(entry);
if (ret) {
LOG(ERROR) << "Failed to unregister memory region: "
<< getErrorString();
PLOG(ERROR) << "Failed to unregister memory region"
}
}
memory_region_list_.clear();

for (size_t i = 0; i < cq_list_.size(); ++i) {
int ret = ibv_destroy_cq(cq_list_[i]);
if (ret) {
PLOG(ERROR) << "Failed to destroy completion queue: "
<< getErrorString();
PLOG(ERROR) << "Failed to destroy completion queue"
}
}
cq_list_.clear();
Expand All @@ -180,14 +176,13 @@ int RdmaContext::deconstruct() {

if (pd_) {
if (ibv_dealloc_pd(pd_))
LOG(ERROR) << "Failed to deallocate protection domain";
PLOG(ERROR) << "Failed to deallocate protection domain";
pd_ = nullptr;
}

if (context_) {
if (ibv_close_device(context_))
LOG(ERROR) << "Failed to close device context: "
<< getErrorString();
PLOG(ERROR) << "Failed to close device context";
context_ = nullptr;
}

Expand Down Expand Up @@ -361,16 +356,16 @@ int RdmaContext::getBestGidIndex(const std::string &device_name,
int is_ipv4, is_ipv4_rival;

if (ibv_query_gid(context, port, gid_index, &temp_gid)) {
LOG(ERROR) << "Failed to query GID " << gid_index << " on "
<< device_name << "/" << port << ": " << getErrorString();
PLOG(ERROR) << "Failed to query GID " << gid_index << " on "
<< device_name << "/" << port;
return -1;
}
is_ipv4 = ipv6_addr_v4mapped((struct in6_addr *)temp_gid.raw);

for (i = 1; i < port_attr.gid_tbl_len; i++) {
if (ibv_query_gid(context, port, i, &temp_gid_rival)) {
LOG(ERROR) << "Failed to query GID " << i << " on " << device_name
<< "/" << port << ": " << getErrorString();
PLOG(ERROR) << "Failed to query GID " << i << " on " << device_name
<< "/" << port;
return -1;
}
is_ipv4_rival =
Expand Down Expand Up @@ -406,11 +401,10 @@ int RdmaContext::openRdmaDevice(const std::string &device_name, uint8_t port,
ibv_port_attr attr;
int ret = ibv_query_port(context, port, &attr);
if (ret) {
LOG(ERROR) << "Failed to query port " << port << " on "
<< device_name << ": " << getErrorString();
PLOG(ERROR) << "Failed to query port " << port << " on "
<< device_name;
if (ibv_close_device(context)) {
LOG(ERROR) << "ibv_close_device(" << device_name
<< ") failed: " << getErrorString();
PLOG(ERROR) << "ibv_close_device(" << device_name << ") failed";
}
ibv_free_device_list(devices);
return ERR_CONTEXT;
Expand All @@ -419,8 +413,7 @@ int RdmaContext::openRdmaDevice(const std::string &device_name, uint8_t port,
if (attr.state != IBV_PORT_ACTIVE) {
LOG(WARNING) << "Device " << device_name << " port not active";
if (ibv_close_device(context)) {
LOG(ERROR) << "ibv_close_device(" << device_name
<< ") failed: " << getErrorString();
PLOG(ERROR) << "ibv_close_device(" << device_name << ") failed";
}
ibv_free_device_list(devices);
return ERR_CONTEXT;
Expand All @@ -429,11 +422,9 @@ int RdmaContext::openRdmaDevice(const std::string &device_name, uint8_t port,
ibv_device_attr device_attr;
ret = ibv_query_device(context, &device_attr);
if (ret) {
LOG(WARNING) << "Failed to query attributes on " << device_name
<< ": " << getErrorString();
PLOG(WARNING) << "Failed to query attributes on " << device_name;
if (ibv_close_device(context)) {
LOG(ERROR) << "ibv_close_device(" << device_name
<< ") failed: " << getErrorString();
PLOG(ERROR) << "ibv_close_device(" << device_name << ") failed";
}
ibv_free_device_list(devices);
return ERR_CONTEXT;
Expand All @@ -442,11 +433,10 @@ int RdmaContext::openRdmaDevice(const std::string &device_name, uint8_t port,
ibv_port_attr port_attr;
ret = ibv_query_port(context, port, &port_attr);
if (ret) {
LOG(WARNING) << "Failed to query port attributes on " << device_name
<< "/" << port << ": " << getErrorString();
PLOG(WARNING) << "Failed to query port attributes on "
<< device_name << "/" << port;
if (ibv_close_device(context)) {
LOG(ERROR) << "ibv_close_device(" << device_name
<< ") failed: " << getErrorString();
PLOG(ERROR) << "ibv_close_device(" << device_name << ") failed";
}
ibv_free_device_list(devices);
return ERR_CONTEXT;
Expand All @@ -464,12 +454,10 @@ int RdmaContext::openRdmaDevice(const std::string &device_name, uint8_t port,

ret = ibv_query_gid(context, port, gid_index, &gid_);
if (ret) {
LOG(ERROR) << "Failed to query GID " << gid_index << " on "
<< device_name << "/" << port << ": "
<< getErrorString();
PLOG(ERROR) << "Failed to query GID " << gid_index << " on "
<< device_name << "/" << port;
if (ibv_close_device(context)) {
LOG(ERROR) << "ibv_close_device(" << device_name
<< ") failed: " << getErrorString();
PLOG(ERROR) << "ibv_close_device(" << device_name << ") failed";
}
ibv_free_device_list(devices);
return ERR_CONTEXT;
Expand All @@ -480,8 +468,7 @@ int RdmaContext::openRdmaDevice(const std::string &device_name, uint8_t port,
LOG(WARNING) << "GID is NULL, please check your GID index by "
"specifying MC_GID_INDEX";
if (ibv_close_device(context)) {
LOG(ERROR) << "ibv_close_device(" << device_name
<< ") failed: " << getErrorString();
PLOG(ERROR) << "ibv_close_device(" << device_name << ") failed";
}
ibv_free_device_list(devices);
return ERR_CONTEXT;
Expand Down Expand Up @@ -510,21 +497,17 @@ int RdmaContext::joinNonblockingPollList(int event_fd, int data_fd) {

int flags = fcntl(data_fd, F_GETFL, 0);
if (flags == -1) {
PLOG(ERROR) << "Failed to get file descriptor flags: "
<< getErrorString();
return ERR_CONTEXT;
PLOG(ERROR) << "Failed to get file descriptor flags" return ERR_CONTEXT;
}
if (fcntl(data_fd, F_SETFL, flags | O_NONBLOCK) == -1) {
PLOG(ERROR) << "Failed to set file descriptor nonblocking: "
<< getErrorString();
PLOG(ERROR) << "Failed to set file descriptor nonblocking";
return ERR_CONTEXT;
}

event.events = EPOLLIN | EPOLLET;
event.data.fd = data_fd;
if (epoll_ctl(event_fd, EPOLL_CTL_ADD, event.data.fd, &event)) {
PLOG(ERROR) << "Failed to register file descriptor to epoll: "
<< getErrorString();
PLOG(ERROR) << "Failed to register file descriptor to epoll";
return ERR_CONTEXT;
}

Expand All @@ -534,8 +517,8 @@ int RdmaContext::joinNonblockingPollList(int event_fd, int data_fd) {
int RdmaContext::poll(int num_entries, ibv_wc *wc, int cq_index) {
int nr_poll = ibv_poll_cq(cq_list_[cq_index], num_entries, wc);
if (nr_poll < 0) {
PLOG(ERROR) << "Failed to poll CQ " << cq_index << " of device "
<< device_name_;
LOG(ERROR) << "Failed to poll CQ " << cq_index << " of device "
<< device_name_;
return ERR_CONTEXT;
}
return nr_poll;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ int RdmaEndPoint::deconstruct() {
<< "Outstanding work requests found, CQ will not be generated";

if (ibv_destroy_qp(qp_list_[i])) {
LOG(ERROR) << "Failed to destroy QP: " << getErrorString();
PLOG(ERROR) << "Failed to destroy QP";
return ERR_ENDPOINT;
}
}
Expand Down Expand Up @@ -194,8 +194,7 @@ void RdmaEndPoint::disconnectUnlocked() {
attr.qp_state = IBV_QPS_RESET;
for (size_t i = 0; i < qp_list_.size(); ++i) {
int ret = ibv_modify_qp(qp_list_[i], &attr, IBV_QP_STATE);
if (ret)
LOG(ERROR) << "Failed to modity QP to RESET: " << getErrorString();
if (ret) PLOG(ERROR) << "Failed to modity QP to RESET";
}
peer_nic_path_.clear();
for (size_t i = 0; i < qp_list_.size(); ++i) wr_depth_list_[i] = 0;
Expand Down Expand Up @@ -261,7 +260,7 @@ int RdmaEndPoint::submitPostSend(
__sync_fetch_and_add(&wr_depth_list_[qp_index], wr_count);
int rc = ibv_post_send(qp_list_[qp_index], wr_list, &bad_wr);
if (rc) {
LOG(ERROR) << "ibv_post_send: " << getErrorString();
PLOG(ERROR) << "Failed to ibv_post_send";
while (bad_wr) {
int i = bad_wr - wr_list;
failed_slice_list.push_back(slice_list[i]);
Expand Down Expand Up @@ -316,10 +315,9 @@ int RdmaEndPoint::doSetupConnection(int qp_index, const std::string &peer_gid,
attr.qp_state = IBV_QPS_RESET;
int ret = ibv_modify_qp(qp, &attr, IBV_QP_STATE);
if (ret) {
std::string message = "Failed to modity QP to RESET: ";
message += getErrorString();
LOG(ERROR) << "[Handshake] " << message;
if (reply_msg) *reply_msg = message;
std::string message = "Failed to modity QP to RESET";
PLOG(ERROR) << "[Handshake] " << message;
if (reply_msg) *reply_msg = message + ": " + strerror(errno);
return ERR_ENDPOINT;
}

Expand All @@ -335,10 +333,9 @@ int RdmaEndPoint::doSetupConnection(int qp_index, const std::string &peer_gid,
IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT | IBV_QP_ACCESS_FLAGS);
if (ret) {
std::string message =
"Failed to modity QP to INIT, check local context port num: ";
message += getErrorString();
LOG(ERROR) << "[Handshake] " << message;
if (reply_msg) *reply_msg = message;
"Failed to modity QP to INIT, check local context port num";
PLOG(ERROR) << "[Handshake] " << message;
if (reply_msg) *reply_msg = message + ": " + strerror(errno);
return ERR_ENDPOINT;
}

Expand Down Expand Up @@ -376,11 +373,9 @@ int RdmaEndPoint::doSetupConnection(int qp_index, const std::string &peer_gid,
IBV_QP_DEST_QPN | IBV_QP_RQ_PSN);
if (ret) {
std::string message =
"Failed to modity QP to RTR, check mtu, gid, peer lid, peer qp "
"num: ";
message += getErrorString();
LOG(ERROR) << "[Handshake] " << message;
if (reply_msg) *reply_msg = message;
"Failed to modity QP to RTR, check mtu, gid, peer lid, peer qp num";
PLOG(ERROR) << "[Handshake] " << message;
if (reply_msg) *reply_msg = message + ": " + strerror(errno);
return ERR_ENDPOINT;
}

Expand All @@ -397,10 +392,9 @@ int RdmaEndPoint::doSetupConnection(int qp_index, const std::string &peer_gid,
IBV_QP_RNR_RETRY | IBV_QP_SQ_PSN |
IBV_QP_MAX_QP_RD_ATOMIC);
if (ret) {
std::string message = "Failed to modity QP to RTS: ";
message += getErrorString();
LOG(ERROR) << "[Handshake] " << message;
if (reply_msg) *reply_msg = message;
std::string message = "Failed to modity QP to RTS";
PLOG(ERROR) << "[Handshake] " << message;
if (reply_msg) *reply_msg = message + ": " + strerror(errno);
return ERR_ENDPOINT;
}

Expand Down

0 comments on commit bfc0f90

Please sign in to comment.