Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 19 additions & 11 deletions src/api/gpu/ucx/nixl_device.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,11 @@ struct nixlGpuXferReqParams {
*/
__device__ inline nixl_status_t
nixlGpuConvertUcsStatus(ucs_status_t status) {
return status == UCS_OK ? NIXL_SUCCESS : NIXL_ERR_BACKEND;
if (!UCS_STATUS_IS_ERR(status)) {
return NIXL_SUCCESS;
}
printf("UCX returned error: %d\n", status);
return NIXL_ERR_BACKEND;
}

/**
Expand All @@ -76,7 +80,8 @@ nixlGpuConvertUcsStatus(ucs_status_t status) {
* @param size [in] Size in bytes of the memory to be transferred.
* @param channel_id [in] Channel ID to use for the transfer.
* @param is_no_delay [in] Whether to use no-delay mode.
* @param xfer_status [out] Status of the transfer. If null, the status is not reported.
* @param xfer_status [out] Status of the transfer. If not null, use @ref
* nixlGpuGetXferStatus to check for completion.
*
* @return nixl_status_t Error code if call was not successful
*/
Expand Down Expand Up @@ -107,9 +112,10 @@ nixlGpuPostSingleWriteXferReq(nixlGpuXferReqH req_hndl,
* @param signal_offset [in] Offset of the signal to be sent.
* @param channel_id [in] Channel ID to use for the transfer.
* @param is_no_delay [in] Whether to use no-delay mode.
* @param xfer_status [out] Status of the transfer. If null, the status is not reported.
* @param xfer_status [out] Status of the transfer. If not null, use @ref
* nixlGpuGetXferStatus to check for completion.
*
* @return nixl_status_t Error code if call was not successful
* @return nixl_status_t Error code if call was not successful
*/
template<nixl_gpu_level_t level = nixl_gpu_level_t::THREAD>
__device__ nixl_status_t
Expand Down Expand Up @@ -143,9 +149,10 @@ nixlGpuPostSignalXferReq(nixlGpuXferReqH req_hndl,
* @param signal_offset [in] Offset of the signal to be sent.
* @param channel_id [in] Channel ID to use for the transfer.
* @param is_no_delay [in] Whether to use no-delay mode.
* @param xfer_status [out] Status of the transfer. If null, the status is not reported.
* @param xfer_status [out] Status of the transfer. If not null, use @ref
* nixlGpuGetXferStatus to check for completion.
*
* @return nixl_status_t Error code if call was not successful
* @return nixl_status_t Error code if call was not successful
*/
template<nixl_gpu_level_t level = nixl_gpu_level_t::THREAD>
__device__ nixl_status_t
Expand Down Expand Up @@ -188,9 +195,10 @@ nixlGpuPostPartialWriteXferReq(nixlGpuXferReqH req_hndl,
* @param signal_offset [in] Offset of the signal to be sent.
* @param channel_id [in] Channel ID to use for the transfer.
* @param is_no_delay [in] Whether to use no-delay mode.
* @param xfer_status [out] Status of the transfer. If null, the status is not reported.
* @param xfer_status [out] Status of the transfer. If not null, use @ref
* nixlGpuGetXferStatus to check for completion.
*
* @return nixl_status_t Error code if call was not successful
* @return nixl_status_t Error code if call was not successful
*/
template<nixl_gpu_level_t level = nixl_gpu_level_t::THREAD>
__device__ nixl_status_t
Expand Down Expand Up @@ -218,9 +226,9 @@ nixlGpuPostWriteXferReq(nixlGpuXferReqH req_hndl,
*
* @param xfer_status [in] Status of the transfer.
*
* @return NIXL_SUCCESS The request has completed, no more operations are in progress.
* @return NIXL_IN_PROG One or more operations in the request have not completed.
* @return Error code if call was not successful
* @return NIXL_SUCCESS The request has completed, no more operations are in progress.
* @return NIXL_IN_PROG One or more operations in the request have not completed.
* @return NIXL_ERR_BACKEND An error occurred in UCX backend.
*/
template<nixl_gpu_level_t level = nixl_gpu_level_t::THREAD>
__device__ nixl_status_t
Expand Down