Skip to content

Commit c15e64e

Browse files
authored
[Libfabric] Add same-agent transfer optimization with memcpy fallback (#809)
- Implement direct memcpy for transfers within the same agent to avoid unnecessary network operations - Add debug logging for transfer path selection and agent comparison - Improve debug messaging for same-agent transfers --------- Signed-off-by: Arun Karthik <akkart@amazon.com>
1 parent 3978a64 commit c15e64e

File tree

2 files changed

+56
-3
lines changed

2 files changed

+56
-3
lines changed

src/plugins/libfabric/libfabric_backend.cpp

Lines changed: 42 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1067,7 +1067,40 @@ nixlLibfabricEngine::postXfer(const nixl_xfer_op_t &operation,
10671067
NIXL_DEBUG << "Processing descriptor " << desc_idx << " GPU " << gpu_id
10681068
<< " addr: " << transfer_addr << " size: " << transfer_size;
10691069

1070-
// Prepare and submit transfer
1070+
NIXL_DEBUG << "DEBUG: remote_agent='" << remote_agent << "' localAgent='" << localAgent
1071+
<< "'";
1072+
1073+
// Check for same-agent (local) transfer - handle with direct memcpy
1074+
if (remote_agent == localAgent) {
1075+
NIXL_DEBUG << "Same-agent transfer detected from localAgent= " << localAgent
1076+
<< "to remote_agent " << remote_agent << "for descriptor " << desc_idx
1077+
<< ", using memcpy fallback for " << transfer_size << " bytes";
1078+
1079+
// For same-agent transfers, we need to copy directly between the descriptor addresses
1080+
// The remote[desc_idx].addr should be the target address for the transfer
1081+
void *remote_addr = reinterpret_cast<void *>(remote[desc_idx].addr);
1082+
1083+
NIXL_DEBUG << "About to perform memcpy: local_addr=" << transfer_addr
1084+
<< " remote_addr=" << remote_addr << " size=" << transfer_size;
1085+
1086+
if (op_type == nixlLibfabricReq::WRITE) {
1087+
// Write: copy from local_addr to remote_addr
1088+
std::memcpy(remote_addr, transfer_addr, transfer_size);
1089+
NIXL_DEBUG << "Same-agent memcpy write completed: " << transfer_addr << " -> "
1090+
<< remote_addr << " (" << transfer_size << " bytes)";
1091+
} else {
1092+
// Read: copy from remote_addr to local_addr
1093+
std::memcpy(transfer_addr, remote_addr, transfer_size);
1094+
NIXL_DEBUG << "Same-agent memcpy read completed: " << remote_addr << " -> "
1095+
<< transfer_addr << " (" << transfer_size << " bytes)";
1096+
}
1097+
1098+
NIXL_DEBUG << "Successfully processed same-agent descriptor " << desc_idx
1099+
<< " using memcpy fallback";
1100+
continue; // Skip the rail manager transfer for this descriptor
1101+
}
1102+
1103+
// Prepare and submit transfer for remote agents
10711104
nixl_status_t status = rail_manager.prepareAndSubmitTransfer(
10721105
op_type,
10731106
transfer_addr,
@@ -1098,8 +1131,14 @@ nixlLibfabricEngine::postXfer(const nixl_xfer_op_t &operation,
10981131
<< " requests from " << desc_count << " descriptors" << " with "
10991132
<< binary_notif->xfer_id_count << " total XFER_IDs";
11001133

1101-
// Adjust to actual request count after all submissions complete
1102-
backend_handle->adjust_total_requests(binary_notif->xfer_id_count);
1134+
// For same-agent transfers, we need to set the total to 0 since we bypassed all rail operations
1135+
if (remote_agent == localAgent) {
1136+
backend_handle->adjust_total_requests(0);
1137+
NIXL_DEBUG << "Same-agent transfer: adjusted total requests to 0 (all handled via memcpy)";
1138+
} else {
1139+
// Adjust to actual request count after all submissions complete
1140+
backend_handle->adjust_total_requests(binary_notif->xfer_id_count);
1141+
}
11031142

11041143
// Send notification immediately after successful request submission
11051144
if (opt_args && opt_args->hasNotif) {

src/utils/libfabric/libfabric_rail.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -307,6 +307,20 @@ nixlLibfabricRail::nixlLibfabricRail(const std::string &device, uint16_t id)
307307
throw std::runtime_error("fi_ep_bind av failed for rail " + std::to_string(rail_id));
308308
}
309309

310+
// Disable shared memory transfers for EFA provider to fix same-agent transfers
311+
bool optval = false;
312+
ret = fi_setopt(&endpoint->fid,
313+
FI_OPT_ENDPOINT,
314+
FI_OPT_SHARED_MEMORY_PERMITTED,
315+
&optval,
316+
sizeof(optval));
317+
if (ret && ret != -FI_ENOSYS) {
318+
NIXL_WARN << "fi_setopt FI_OPT_SHARED_MEMORY_PERMITTED failed for rail " << rail_id
319+
<< ": " << fi_strerror(-ret) << " - continuing anyway";
320+
} else if (ret == 0) {
321+
NIXL_DEBUG << "Successfully disabled shared memory transfers for rail " << rail_id;
322+
}
323+
310324
// Enable endpoint for this rail
311325
ret = fi_enable(endpoint);
312326
if (ret) {

0 commit comments

Comments
 (0)