1919
2020#include < algorithm>
2121#include < cstdint>
22+ #include < cstdlib>
2223#include < memory>
2324
25+ #include < cuda.h>
26+
2427#include " arrow/buffer.h"
2528#include " arrow/io/memory.h"
2629#include " arrow/status.h"
3235namespace arrow {
3336namespace gpu {
3437
35- CudaBuffer::~CudaBuffer () {
38+ // ----------------------------------------------------------------------
39+ // CUDA IPC memory handle
40+
41+ struct CudaIpcMemHandle ::CudaIpcMemHandleImpl {
42+ explicit CudaIpcMemHandleImpl (const void * handle) {
43+ memcpy (&ipc_handle, handle, sizeof (CUipcMemHandle));
44+ }
45+
46+ CUipcMemHandle ipc_handle;
47+ };
48+
49+ CudaIpcMemHandle::CudaIpcMemHandle (const void * handle) {
50+ impl_.reset (new CudaIpcMemHandleImpl (handle));
51+ }
52+
53+ CudaIpcMemHandle::~CudaIpcMemHandle () {}
54+
55+ Status CudaIpcMemHandle::FromBuffer (const void * opaque_handle,
56+ std::unique_ptr<CudaIpcMemHandle>* handle) {
57+ *handle = std::unique_ptr<CudaIpcMemHandle>(new CudaIpcMemHandle (opaque_handle));
58+ return Status::OK ();
59+ }
60+
61+ Status CudaIpcMemHandle::Serialize (MemoryPool* pool, std::shared_ptr<Buffer>* out) const {
62+ std::shared_ptr<MutableBuffer> buffer;
63+ constexpr size_t kHandleSize = sizeof (CUipcMemHandle);
64+ RETURN_NOT_OK (AllocateBuffer (pool, static_cast <int64_t >(kHandleSize ), &buffer));
65+ memcpy (buffer->mutable_data (), &impl_->ipc_handle , kHandleSize );
66+ *out = buffer;
67+ return Status::OK ();
68+ }
69+
70+ const void * CudaIpcMemHandle::handle () const { return &impl_->ipc_handle ; }
71+
72+ // ----------------------------------------------------------------------
73+
74+ CudaBuffer::CudaBuffer (uint8_t * data, int64_t size,
75+ const std::shared_ptr<CudaContext>& context, bool own_data,
76+ bool is_ipc)
77+ : Buffer(data, size), context_(context), own_data_(own_data), is_ipc_(is_ipc) {
78+ is_mutable_ = true ;
79+ mutable_data_ = data;
80+ }
81+
82+ CudaBuffer::~CudaBuffer () { DCHECK (Close ().ok ()); }
83+
84+ Status CudaBuffer::Close () {
3685 if (own_data_) {
37- DCHECK (context_->Free (mutable_data_, size_).ok ());
86+ if (is_ipc_) {
87+ CU_RETURN_NOT_OK (cuIpcCloseMemHandle (reinterpret_cast <CUdeviceptr>(mutable_data_)));
88+ } else {
89+ return context_->Free (mutable_data_, size_);
90+ }
3891 }
92+ return Status::OK ();
3993}
4094
4195CudaBuffer::CudaBuffer (const std::shared_ptr<CudaBuffer>& parent, const int64_t offset,
4296 const int64_t size)
43- : Buffer(parent, offset, size), context_(parent->context ()) {}
97+ : Buffer(parent, offset, size),
98+ context_ (parent->context ()),
99+ own_data_(false ),
100+ is_ipc_(false ) {}
44101
45102Status CudaBuffer::CopyToHost (const int64_t position, const int64_t nbytes,
46103 uint8_t * out) const {
@@ -53,12 +110,15 @@ Status CudaBuffer::CopyFromHost(const int64_t position, const uint8_t* data,
53110 return context_->CopyHostToDevice (mutable_data_ + position, data, nbytes);
54111}
55112
56- Status AllocateCudaBuffer (const int64_t size, const std::shared_ptr<CudaContext>& context,
57- std::shared_ptr<CudaBuffer>* out) {
58- DCHECK (context);
59- uint8_t * data = nullptr ;
60- RETURN_NOT_OK (context->Allocate (size, &data));
61- *out = std::make_shared<CudaBuffer>(data, size, context);
113+ Status CudaBuffer::ExportForIpc (std::unique_ptr<CudaIpcMemHandle>* handle) {
114+ if (is_ipc_) {
115+ return Status::Invalid (" Buffer has already been exported for IPC" );
116+ }
117+ CUipcMemHandle cu_handle;
118+ CU_RETURN_NOT_OK (
119+ cuIpcGetMemHandle (&cu_handle, reinterpret_cast <CUdeviceptr>(mutable_data_)));
120+ is_ipc_ = true ;
121+ *handle = std::unique_ptr<CudaIpcMemHandle>(new CudaIpcMemHandle (&cu_handle));
62122 return Status::OK ();
63123}
64124
0 commit comments