Skip to content

Commit

Permalink
profiling IB transport traffic
Browse files Browse the repository at this point in the history
Summary:
Add cvar NCCL_CTRAN_IB_TRAFFIC_PROFILNG to optionally profile lifetime traffic for each IB device and each QP.

NCCL_CTRAN_IB_TRAFFIC_PROFILNG is disabled by default, set to true to enable.

Differential Revision: D51244008
  • Loading branch information
minsii authored and facebook-github-bot committed Nov 13, 2023
1 parent 83d24a5 commit 7eac3ec
Show file tree
Hide file tree
Showing 6 changed files with 68 additions and 1 deletion.
7 changes: 7 additions & 0 deletions README.cvars
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,13 @@ Description:
Type: stringlist
Default: None

NCCL_CTRAN_IB_TRAFFIC_PROFILNG
Description:
Enable IB transport traffic profiling.
Disabled by default.
Type: bool
Default: False

NCCL_CTRAN_IB_MAX_QPS
Description:
Maximum number of QPs to enable, so data can be split across
Expand Down
44 changes: 43 additions & 1 deletion src/ctran/backends/ib/ctranIb.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,13 @@
List of IB HCAs available for NCCL to use.
(this needs to be renamed to NCCL_IB_HCA_LIST eventually)
- name : NCCL_CTRAN_IB_TRAFFIC_PROFILNG
type : bool
default : false
description : |-
Enable IB transport traffic profiling.
Disabled by default.
=== END_NCCL_CVAR_INFO_BLOCK ===
*/

Expand Down Expand Up @@ -107,6 +114,41 @@ ctranIbSingleton::~ctranIbSingleton() {
for (auto context : this->contexts) {
NCCLCHECKIGNORE(wrap_ibv_close_device(context));
}

if (!NCCL_CTRAN_IB_TRAFFIC_PROFILNG)
return;

this->trafficRecordMutex.lock();
for (auto& it : this->trafficPerDevice) {
INFO(NCCL_INIT, "CTRAN-IB: [traffic profiling] device %s total traffic: %ld bytes", it.first.c_str(), it.second);
}
for (auto& it : this->trafficPerQP) {
INFO(NCCL_INIT, "CTRAN-IB: [traffic profiling] qp %d total traffic: %ld bytes", it.first, it.second);
}
}

void ctranIbSingleton::recordCtxTraffic(
struct ibv_context* ctx,
size_t nbytes) {
if (!NCCL_CTRAN_IB_TRAFFIC_PROFILNG)
return;
std::lock_guard<std::mutex> guard(this->trafficRecordMutex);
auto devName = std::string(ctx->device->name);

if (this->trafficPerDevice.count(devName) == 0) {
this->trafficPerDevice[devName] = 0;
}
this->trafficPerDevice[devName] += nbytes;
}

void ctranIbSingleton::recordQpTraffic(struct ibv_qp* qp, size_t nbytes) {
if (!NCCL_CTRAN_IB_TRAFFIC_PROFILNG)
return;
std::lock_guard<std::mutex> guard(this->trafficRecordMutex);
if (this->trafficPerQP.count(qp->qp_num) == 0) {
this->trafficPerQP[qp->qp_num] = 0;
}
this->trafficPerQP[qp->qp_num] += nbytes;
}

ctranIb::ctranIb(ncclComm *comm) {
Expand All @@ -120,7 +162,7 @@ ctranIb::ctranIb(ncclComm *comm) {
this->pimpl->context = s.contexts[comm->cudaDev];
this->pimpl->pd = s.pds[comm->cudaDev];
this->pimpl->port = s.ports[comm->cudaDev];
INFO(NCCL_INIT, "CTRAN-IB: using device %s, port %d", s.devNames[comm->cudaDev].c_str(), this->pimpl->port);
INFO(NCCL_INIT, "CTRAN-IB: using device %s, port %d commHash %lu", s.devNames[comm->cudaDev].c_str(), this->pimpl->port, comm->commHash);

struct ibv_device_attr devAttr;
NCCLCHECKIGNORE(wrap_ibv_query_device(this->pimpl->context, &devAttr));
Expand Down
6 changes: 6 additions & 0 deletions src/ctran/backends/ib/ctranIbImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include <vector>
#include <thread>
#include <mutex>
#include <stdint.h>
#include <unordered_map>
#include "ibvwrap.h"
#include "bootstrap.h"
Expand Down Expand Up @@ -42,8 +43,13 @@ class ctranIbSingleton {
std::vector<struct ibv_context *> contexts;
std::vector<struct ibv_pd *> pds;
std::vector<std::string> devNames;
void recordCtxTraffic(struct ibv_context *ctx, size_t nbytes);
void recordQpTraffic(struct ibv_qp* qp, size_t nbytes);

private:
std::unordered_map<std::string, size_t> trafficPerDevice;
std::unordered_map<uint32_t, size_t> trafficPerQP;
std::mutex trafficRecordMutex;
ctranIbSingleton();
~ctranIbSingleton();
};
Expand Down
5 changes: 5 additions & 0 deletions src/ctran/backends/ib/ctranIbVc.cc
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary.

#include <iostream>
#include <mutex>
#include <unordered_map>
#include <vector>
#include <thread>
#include <unistd.h>
Expand Down Expand Up @@ -343,6 +345,8 @@ ncclResult_t ctranIb::impl::vc::postPutMsg(const void *sbuf, void *dbuf, std::si
}

uint64_t offset = 0;
ctranIbSingleton& s = ctranIbSingleton::getInstance();
s.recordCtxTraffic(this->context, len_);

for (int i = 0; i < numQps; i++) {
uint64_t len = len_ / numQps;
Expand Down Expand Up @@ -386,6 +390,7 @@ ncclResult_t ctranIb::impl::vc::postPutMsg(const void *sbuf, void *dbuf, std::si
wr.wr.rdma.remote_addr = reinterpret_cast<uint64_t>(dbuf) + offset;
wr.wr.rdma.rkey = rkey;

s.recordQpTraffic(this->dataQp[i], toSend);
NCCLCHECKGOTO(wrap_ibv_post_send(this->dataQp[i], &wr, &badWr), res, exit);

len -= toSend;
Expand Down
2 changes: 2 additions & 0 deletions src/include/nccl_cvars.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ extern enum NCCL_SENDRECV_ALGO NCCL_SENDRECV_ALGO;

extern std::set<std::string> NCCL_IB_HCA;

extern bool NCCL_CTRAN_IB_TRAFFIC_PROFILNG;

extern int NCCL_CTRAN_IB_MAX_QPS;

extern int NCCL_CTRAN_IB_QP_SCALING_THRESHOLD;
Expand Down
5 changes: 5 additions & 0 deletions src/misc/nccl_cvars.cc
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,8 @@ enum NCCL_SENDRECV_ALGO NCCL_SENDRECV_ALGO;

std::set<std::string> NCCL_IB_HCA;

bool NCCL_CTRAN_IB_TRAFFIC_PROFILNG;

int NCCL_CTRAN_IB_MAX_QPS;

int NCCL_CTRAN_IB_QP_SCALING_THRESHOLD;
Expand Down Expand Up @@ -126,6 +128,7 @@ void ncclCvarInit() {
env.insert("NCCL_DDA_FORCE_P2P_ACCESS");
env.insert("NCCL_SENDRECV_ALGO");
env.insert("NCCL_IB_HCA");
env.insert("NCCL_CTRAN_IB_TRAFFIC_PROFILNG");
env.insert("NCCL_CTRAN_IB_MAX_QPS");
env.insert("NCCL_CTRAN_IB_QP_SCALING_THRESHOLD");
env.insert("NCCL_CTRAN_PROFILING");
Expand Down Expand Up @@ -239,6 +242,8 @@ void ncclCvarInit() {

NCCL_IB_HCA = env2strlist("NCCL_IB_HCA", nullptr);

NCCL_CTRAN_IB_TRAFFIC_PROFILNG = env2bool("NCCL_CTRAN_IB_TRAFFIC_PROFILNG", "False");

NCCL_CTRAN_IB_MAX_QPS = env2int("NCCL_CTRAN_IB_MAX_QPS", "1");

NCCL_CTRAN_IB_QP_SCALING_THRESHOLD = env2int("NCCL_CTRAN_IB_QP_SCALING_THRESHOLD", "1048576");
Expand Down

0 comments on commit 7eac3ec

Please sign in to comment.