Check for support of CUDA Memory Pools at runtime (#4679)

Some CUDA GPUs, like the Quadro M3000M don't support Memory Pools operations like cudaMallocAsync/cudaFreeAsync even on driver versions newer than 11.2, and this can result in errors like: CUDA runtime error: operation not supported So check for support at runtime instead of compile time. Still keep the compile time check to support building with CUDA versions older than 11.2.
isl-org · Oct 28, 2023 · 25c31f0 · 25c31f0
1 parent 881ae76
commit 25c31f0
Show file tree

Hide file tree

Showing 4 changed files with 40 additions and 3 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -20,6 +20,7 @@
 * Fix some bad triangle generation in TriangleMesh::SimplifyQuadricDecimation
 * Fix printing of tensor in gpu and add validation check for bounds of axis-aligned bounding box (PR #6444) 
 * Python 3.11 support. bump pybind11 v2.6.2 -> v2.11.1
+* Check for support of CUDA Memory Pools at runtime (#4679)
 
 ## 0.13
 

diff --git a/cpp/open3d/core/CUDAUtils.cpp b/cpp/open3d/core/CUDAUtils.cpp
@@ -108,6 +108,27 @@ void AssertCUDADeviceAvailable(const Device& device) {
  }
 }
 
+bool SupportsMemoryPools(const Device& device) {
+#ifdef BUILD_CUDA_MODULE
+ if (device.IsCUDA()) {
+ int driverVersion = 0;
+ int deviceSupportsMemoryPools = 0;
+ OPEN3D_CUDA_CHECK(cudaDriverGetVersion(&driverVersion));
+ if (driverVersion >=
+ 11020) { // avoid invalid value error in cudaDeviceGetAttribute
+ OPEN3D_CUDA_CHECK(cudaDeviceGetAttribute(
+ &deviceSupportsMemoryPools, cudaDevAttrMemoryPoolsSupported,
+ device.GetID()));
+ }
+ return !!deviceSupportsMemoryPools;
+ } else {
+ return false;
+ }
+#else
+ return false;
+#endif
+}
+
 #ifdef BUILD_CUDA_MODULE
 int GetDevice() {
  int device;

diff --git a/cpp/open3d/core/CUDAUtils.h b/cpp/open3d/core/CUDAUtils.h
@@ -255,6 +255,13 @@ void AssertCUDADeviceAvailable(int device_id);
 /// \param device The device to be checked.
 void AssertCUDADeviceAvailable(const Device& device);
 
+/// Checks if the CUDA device support Memory Pools
+/// used by the Stream Ordered Memory Allocator,
+/// see
+/// https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__MEMORY__POOLS.html
+/// \param device The device to be checked.
+bool SupportsMemoryPools(const Device& device);
+
 #ifdef BUILD_CUDA_MODULE
 
 int GetDevice();

diff --git a/cpp/open3d/core/MemoryManagerCUDA.cpp b/cpp/open3d/core/MemoryManagerCUDA.cpp
@@ -20,8 +20,12 @@ void* MemoryManagerCUDA::Malloc(size_t byte_size, const Device& device) {
  void* ptr;
  if (device.IsCUDA()) {
 #if CUDART_VERSION >= 11020
- OPEN3D_CUDA_CHECK(cudaMallocAsync(static_cast<void**>(&ptr), byte_size,
- cuda::GetStream()));
+ if (cuda::SupportsMemoryPools(device)) {
+ OPEN3D_CUDA_CHECK(cudaMallocAsync(static_cast<void**>(&ptr),
+ byte_size, cuda::GetStream()));
+ } else {
+ OPEN3D_CUDA_CHECK(cudaMalloc(static_cast<void**>(&ptr), byte_size));
+ }
 #else
  OPEN3D_CUDA_CHECK(cudaMalloc(static_cast<void**>(&ptr), byte_size));
 #endif
@@ -38,7 +42,11 @@ void MemoryManagerCUDA::Free(void* ptr, const Device& device) {
  if (device.IsCUDA()) {
  if (ptr && IsCUDAPointer(ptr, device)) {
 #if CUDART_VERSION >= 11020
- OPEN3D_CUDA_CHECK(cudaFreeAsync(ptr, cuda::GetStream()));
+ if (cuda::SupportsMemoryPools(device)) {
+ OPEN3D_CUDA_CHECK(cudaFreeAsync(ptr, cuda::GetStream()));
+ } else {
+ OPEN3D_CUDA_CHECK(cudaFree(ptr));
+ }
 #else
  OPEN3D_CUDA_CHECK(cudaFree(ptr));
 #endif