@@ -12,15 +12,21 @@ extern "C" {
1212#include  < cuda_runtime_api.h> 
1313#include  < cuda.h> 
1414
15- #define  CUDA_CHECK (condition )                                                  \
16-   do  {                                                                         \
17-     CUresult error = condition;                                                \
18-     if  (error != 0 ) {                                                          \
19-       char * error_string;                                                      \
20-       cuGetErrorString (error, (const  char **)&error_string);                    \
21-       std::cerr << " CUDA Error: "   << error_string << "  at "   << __FILE__ << " :"   \
22-                 << __LINE__ << std::endl;                                      \
23-     }                                                                          \
15+ char  error_msg[10240 ];  //  10KB buffer to store error messages
16+ CUresult no_error = CUresult(0 );
17+ CUresult error_code = no_error;  //  store error code
18+ 
19+ #define  CUDA_CHECK (condition )                                           \
20+   do  {                                                                  \
21+     CUresult error = condition;                                         \
22+     if  (error != 0 ) {                                                   \
23+       error_code = error;                                               \
24+       char * error_string;                                               \
25+       cuGetErrorString (error, (const  char **)&error_string);             \
26+       snprintf (error_msg, sizeof (error_msg), " CUDA Error: %s at %s:%d"  , \
27+                error_string, __FILE__, __LINE__);                       \
28+       std::cerr << error_msg << std::endl;                              \
29+     }                                                                   \
2430  } while  (0 )
2531
2632//  Global references to Python callables
@@ -54,14 +60,22 @@ void create_and_map(unsigned long long device, ssize_t size, CUdeviceptr d_mem,
5460
5561  //  Allocate memory using cuMemCreate
5662  CUDA_CHECK (cuMemCreate (p_memHandle, size, &prop, 0 ));
63+   if  (error_code != 0 ) {
64+     return ;
65+   }
5766  CUDA_CHECK (cuMemMap (d_mem, size, 0 , *p_memHandle, 0 ));
58- 
67+   if  (error_code != 0 ) {
68+     return ;
69+   }
5970  CUmemAccessDesc accessDesc = {};
6071  accessDesc.location .type  = CU_MEM_LOCATION_TYPE_DEVICE;
6172  accessDesc.location .id  = device;
6273  accessDesc.flags  = CU_MEM_ACCESS_FLAGS_PROT_READWRITE;
6374
6475  CUDA_CHECK (cuMemSetAccess (d_mem, size, &accessDesc, 1 ));
76+   if  (error_code != 0 ) {
77+     return ;
78+   }
6579  //  std::cout << "create_and_map: device=" << device << ", size=" << size << ",
6680  //  d_mem=" << d_mem << ", p_memHandle=" << p_memHandle << std::endl;
6781}
@@ -73,7 +87,13 @@ void unmap_and_release(unsigned long long device, ssize_t size,
7387  //  ", d_mem=" << d_mem << ", p_memHandle=" << p_memHandle << std::endl;
7488  ensure_context (device);
7589  CUDA_CHECK (cuMemUnmap (d_mem, size));
90+   if  (error_code != 0 ) {
91+     return ;
92+   }
7693  CUDA_CHECK (cuMemRelease (*p_memHandle));
94+   if  (error_code != 0 ) {
95+     return ;
96+   }
7797}
7898
7999PyObject* create_tuple_from_c_integers (unsigned  long  long  a,
@@ -121,12 +141,16 @@ void* my_malloc(ssize_t size, int device, CUstream stream) {
121141  size_t  granularity;
122142  CUDA_CHECK (cuMemGetAllocationGranularity (&granularity, &prop,
123143                                           CU_MEM_ALLOC_GRANULARITY_MINIMUM));
124- 
144+   if  (error_code != 0 ) {
145+     return  nullptr ;
146+   }
125147  size_t  alignedSize = ((size + granularity - 1 ) / granularity) * granularity;
126148
127149  CUdeviceptr d_mem;
128150  CUDA_CHECK (cuMemAddressReserve (&d_mem, alignedSize, 0 , 0 , 0 ));
129- 
151+   if  (error_code != 0 ) {
152+     return  nullptr ;
153+   }
130154  //  allocate the CUmemGenericAllocationHandle
131155  CUmemGenericAllocationHandle* p_memHandle =
132156      (CUmemGenericAllocationHandle*)malloc (
@@ -208,6 +232,9 @@ void my_free(void* ptr, ssize_t size, int device, CUstream stream) {
208232
209233  //  free address and the handle
210234  CUDA_CHECK (cuMemAddressFree (d_mem, size));
235+   if  (error_code != 0 ) {
236+     return ;
237+   }
211238  free (p_memHandle);
212239}
213240
@@ -258,6 +285,12 @@ static PyObject* python_unmap_and_release(PyObject* self, PyObject* args) {
258285
259286  unmap_and_release (recv_device, recv_size, d_mem_ptr, p_memHandle);
260287
288+   if  (error_code != 0 ) {
289+     error_code = no_error;
290+     PyErr_SetString (PyExc_RuntimeError, error_msg);
291+     return  nullptr ;
292+   }
293+ 
261294  Py_RETURN_NONE;
262295}
263296
@@ -282,6 +315,12 @@ static PyObject* python_create_and_map(PyObject* self, PyObject* args) {
282315
283316  create_and_map (recv_device, recv_size, d_mem_ptr, p_memHandle);
284317
318+   if  (error_code != 0 ) {
319+     error_code = no_error;
320+     PyErr_SetString (PyExc_RuntimeError, error_msg);
321+     return  nullptr ;
322+   }
323+ 
285324  Py_RETURN_NONE;
286325}
287326
0 commit comments