Skip to content

Commit b1c22ed

Browse files
authored
NIXLBench: add support for specifying GDS batch and pool sizes (#394)
* add support for specifying gds batch and pool sizes * update readme
1 parent 4de5ee3 commit b1c22ed

File tree

4 files changed

+21
-5
lines changed

4 files changed

+21
-5
lines changed

benchmark/kvbench/README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,15 +33,15 @@ A utility for generating NIXL Bench commands that test KVCache transfer across v
3333

3434
### Docker
3535
```bash
36-
git clone https://github.com/cheese-head/nixl.git
36+
git clone https://github.com/ai-dynamo/nixl.git
3737
export NIXL_SRC=/path/to/nixl/
3838
cd nixl/benchmark/nixlbench/contrib
3939
./build.sh --nixl $NIXL_SRC
4040
```
4141

4242
### Python
4343
```bash
44-
git clone https://github.com/cheese-head/nixl.git
44+
git clone https://github.com/ai-dynamo/nixl.git
4545
cd nixl/benchmark/kvbench
4646
python3 -m venv venv
4747
source venv/bin/activate

benchmark/nixlbench/src/utils/utils.cpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,8 @@ DEFINE_int32(num_target_dev, 1, "Number of device in target process");
6666
DEFINE_bool(enable_pt, false, "Enable Progress Thread (only used with nixl worker)");
6767
// GDS options - only used when backend is GDS
6868
DEFINE_string(gds_filepath, "", "File path for GDS operations (only used with GDS backend)");
69+
DEFINE_int32(gds_batch_pool_size, 32, "Batch pool size for GDS operations (default: 32, only used with GDS backend)");
70+
DEFINE_int32(gds_batch_limit, 128, "Batch limit for GDS operations (default: 128, only used with GDS backend)");
6971

7072
// TODO: We should take rank wise device list as input to extend support
7173
// <rank>:<device_list>, ...
@@ -102,7 +104,8 @@ bool xferBenchConfig::enable_pt = false;
102104
std::string xferBenchConfig::device_list = "";
103105
std::string xferBenchConfig::etcd_endpoints = "";
104106
std::string xferBenchConfig::gds_filepath = "";
105-
107+
int xferBenchConfig::gds_batch_pool_size = 0;
108+
int xferBenchConfig::gds_batch_limit = 0;
106109
std::vector<std::string> devices = { };
107110
int xferBenchConfig::num_files = 0;
108111
std::string xferBenchConfig::posix_api_type = "";
@@ -122,6 +125,8 @@ int xferBenchConfig::loadFromFlags() {
122125
// Load GDS-specific configurations if backend is GDS
123126
if (backend == XFERBENCH_BACKEND_GDS) {
124127
gds_filepath = FLAGS_gds_filepath;
128+
gds_batch_pool_size = FLAGS_gds_batch_pool_size;
129+
gds_batch_limit = FLAGS_gds_batch_limit;
125130
num_files = FLAGS_num_files;
126131
storage_enable_direct = FLAGS_storage_enable_direct;
127132
}
@@ -254,6 +259,10 @@ void xferBenchConfig::printConfig() {
254259
if (backend == XFERBENCH_BACKEND_GDS) {
255260
std::cout << std::left << std::setw(60) << "GDS filepath (--gds_filepath=path)" << ": "
256261
<< gds_filepath << std::endl;
262+
std::cout << std::left << std::setw(60) << "GDS batch pool size (--gds_batch_pool_size=N)" << ": "
263+
<< gds_batch_pool_size << std::endl;
264+
std::cout << std::left << std::setw(60) << "GDS batch limit (--gds_batch_limit=N)" << ": "
265+
<< gds_batch_limit << std::endl;
257266
std::cout << std::left << std::setw(60) << "GDS enable direct (--gds_enable_direct=[0,1])" << ": "
258267
<< storage_enable_direct << std::endl;
259268
std::cout << std::left << std::setw(60) << "Number of files (--num_files=N)" << ": "

benchmark/nixlbench/src/utils/utils.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,8 @@ class xferBenchConfig {
128128
static std::string posix_api_type;
129129
static std::string posix_filepath;
130130
static bool storage_enable_direct;
131+
static int gds_batch_pool_size;
132+
static int gds_batch_limit;
131133

132134
static int loadFromFlags();
133135
static void printConfig();

benchmark/nixlbench/src/worker/nixl/nixl_worker.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,10 @@ xferBenchNixlWorker::xferBenchNixlWorker(int *argc, char ***argv, std::vector<st
140140
} else if (0 == xferBenchConfig::backend.compare(XFERBENCH_BACKEND_GDS)) {
141141
// Using default param values for GDS backend
142142
std::cout << "GDS backend" << std::endl;
143+
backend_params["batch_pool_size"] = std::to_string(xferBenchConfig::gds_batch_pool_size);
144+
backend_params["batch_limit"] = std::to_string(xferBenchConfig::gds_batch_limit);
145+
std::cout << "GDS batch pool size: " << xferBenchConfig::gds_batch_pool_size << std::endl;
146+
std::cout << "GDS batch limit: " << xferBenchConfig::gds_batch_limit << std::endl;
143147
} else if (0 == xferBenchConfig::backend.compare(XFERBENCH_BACKEND_POSIX)) {
144148
// Set API type parameter for POSIX backend
145149
if (xferBenchConfig::posix_api_type == XFERBENCH_POSIX_API_AIO) {
@@ -404,9 +408,10 @@ std::vector<std::vector<xferBenchIOV>> xferBenchNixlWorker::allocateMemory(int n
404408

405409
if (XFERBENCH_BACKEND_GDS == xferBenchConfig::backend ||
406410
XFERBENCH_BACKEND_POSIX == xferBenchConfig::backend) {
407-
remote_fds = createFileFds(getName(), true);
411+
bool is_gds = XFERBENCH_BACKEND_GDS == xferBenchConfig::backend;
412+
remote_fds = createFileFds(getName(), is_gds);
408413
if (remote_fds.empty()) {
409-
std::cerr << "Failed to create GDS file" << std::endl;
414+
std::cerr << "Failed to create " << ((is_gds) ? "GDS" : "POSIX") << " file" << std::endl;
410415
exit(EXIT_FAILURE);
411416
}
412417
for (int list_idx = 0; list_idx < num_lists; list_idx++) {

0 commit comments

Comments
 (0)