@@ -1067,7 +1067,40 @@ nixlLibfabricEngine::postXfer(const nixl_xfer_op_t &operation,
10671067 NIXL_DEBUG << " Processing descriptor " << desc_idx << " GPU " << gpu_id
10681068 << " addr: " << transfer_addr << " size: " << transfer_size;
10691069
1070- // Prepare and submit transfer
1070+ NIXL_DEBUG << " DEBUG: remote_agent='" << remote_agent << " ' localAgent='" << localAgent
1071+ << " '" ;
1072+
1073+ // Check for same-agent (local) transfer - handle with direct memcpy
1074+ if (remote_agent == localAgent) {
1075+ NIXL_DEBUG << " Same-agent transfer detected from localAgent= " << localAgent
1076+ << " to remote_agent " << remote_agent << " for descriptor " << desc_idx
1077+ << " , using memcpy fallback for " << transfer_size << " bytes" ;
1078+
1079+ // For same-agent transfers, we need to copy directly between the descriptor addresses
1080+ // The remote[desc_idx].addr should be the target address for the transfer
1081+ void *remote_addr = reinterpret_cast <void *>(remote[desc_idx].addr );
1082+
1083+ NIXL_DEBUG << " About to perform memcpy: local_addr=" << transfer_addr
1084+ << " remote_addr=" << remote_addr << " size=" << transfer_size;
1085+
1086+ if (op_type == nixlLibfabricReq::WRITE) {
1087+ // Write: copy from local_addr to remote_addr
1088+ std::memcpy (remote_addr, transfer_addr, transfer_size);
1089+ NIXL_DEBUG << " Same-agent memcpy write completed: " << transfer_addr << " -> "
1090+ << remote_addr << " (" << transfer_size << " bytes)" ;
1091+ } else {
1092+ // Read: copy from remote_addr to local_addr
1093+ std::memcpy (transfer_addr, remote_addr, transfer_size);
1094+ NIXL_DEBUG << " Same-agent memcpy read completed: " << remote_addr << " -> "
1095+ << transfer_addr << " (" << transfer_size << " bytes)" ;
1096+ }
1097+
1098+ NIXL_DEBUG << " Successfully processed same-agent descriptor " << desc_idx
1099+ << " using memcpy fallback" ;
1100+ continue ; // Skip the rail manager transfer for this descriptor
1101+ }
1102+
1103+ // Prepare and submit transfer for remote agents
10711104 nixl_status_t status = rail_manager.prepareAndSubmitTransfer (
10721105 op_type,
10731106 transfer_addr,
@@ -1098,8 +1131,14 @@ nixlLibfabricEngine::postXfer(const nixl_xfer_op_t &operation,
10981131 << " requests from " << desc_count << " descriptors" << " with "
10991132 << binary_notif->xfer_id_count << " total XFER_IDs" ;
11001133
1101- // Adjust to actual request count after all submissions complete
1102- backend_handle->adjust_total_requests (binary_notif->xfer_id_count );
1134+ // For same-agent transfers, we need to set the total to 0 since we bypassed all rail operations
1135+ if (remote_agent == localAgent) {
1136+ backend_handle->adjust_total_requests (0 );
1137+ NIXL_DEBUG << " Same-agent transfer: adjusted total requests to 0 (all handled via memcpy)" ;
1138+ } else {
1139+ // Adjust to actual request count after all submissions complete
1140+ backend_handle->adjust_total_requests (binary_notif->xfer_id_count );
1141+ }
11031142
11041143 // Send notification immediately after successful request submission
11051144 if (opt_args && opt_args->hasNotif ) {
0 commit comments