From eca1c64ace26026b6d5fcd3ba4c894f60fd0aef9 Mon Sep 17 00:00:00 2001 From: Michael Demoret <42954918+mdemoret-nv@users.noreply.github.com> Date: Mon, 14 Dec 2020 15:11:43 -0700 Subject: [PATCH 1/6] Adding references to DeviceBuffer to ensure necessary MR and Stream lifetime --- python/rmm/_lib/device_buffer.pxd | 10 ++++++++++ python/rmm/_lib/device_buffer.pyx | 6 ++++++ python/rmm/_lib/memory_resource.pxd | 2 ++ python/rmm/_lib/memory_resource.pyx | 2 +- python/rmm/tests/test_rmm.py | 25 +++++++++++++++++++++++++ 5 files changed, 44 insertions(+), 1 deletion(-) diff --git a/python/rmm/_lib/device_buffer.pxd b/python/rmm/_lib/device_buffer.pxd index 98a79d1da..76de9da45 100644 --- a/python/rmm/_lib/device_buffer.pxd +++ b/python/rmm/_lib/device_buffer.pxd @@ -17,6 +17,7 @@ from libc.stdint cimport uintptr_t from rmm._lib.cuda_stream_view cimport cuda_stream_view from rmm._cuda.stream cimport Stream +from rmm._lib.memory_resource cimport MemoryResource cdef extern from "rmm/device_buffer.hpp" namespace "rmm" nogil: @@ -38,6 +39,15 @@ cdef extern from "rmm/device_buffer.hpp" namespace "rmm" nogil: cdef class DeviceBuffer: cdef unique_ptr[device_buffer] c_obj + # Holds a reference to the MemoryResource used for allocation. Ensures the + # MR does not get destroyed before this DeviceBuffer. `mr` is needed for + # deallocation + cdef MemoryResource mr + + # Holds a reference to the stream used by the underlying `device_buffer`. Ensures the + # stream does not get destroyed before this DeviceBuffer + cdef Stream stream + @staticmethod cdef DeviceBuffer c_from_unique_ptr(unique_ptr[device_buffer] ptr) diff --git a/python/rmm/_lib/device_buffer.pyx b/python/rmm/_lib/device_buffer.pyx index 5e19e65a3..d7b172fe9 100644 --- a/python/rmm/_lib/device_buffer.pyx +++ b/python/rmm/_lib/device_buffer.pyx @@ -32,6 +32,7 @@ from rmm._lib.lib cimport ( cudaStream_t, cudaStreamSynchronize, ) +from rmm._lib.memory_resource cimport get_current_device_resource cdef class DeviceBuffer: @@ -80,6 +81,11 @@ cdef class DeviceBuffer: if stream.c_is_default(): stream.c_synchronize() + + # Save a reference to the MR and stream used for allocation + self.mr = get_current_device_resource() + self.stream = stream + def __len__(self): return self.size diff --git a/python/rmm/_lib/memory_resource.pxd b/python/rmm/_lib/memory_resource.pxd index 9729b058f..66ef60377 100644 --- a/python/rmm/_lib/memory_resource.pxd +++ b/python/rmm/_lib/memory_resource.pxd @@ -102,3 +102,5 @@ cdef class LoggingResourceAdaptor(MemoryResource): cdef object _log_file_name cpdef get_file_name(self) cpdef flush(self) + +cpdef MemoryResource get_current_device_resource() diff --git a/python/rmm/_lib/memory_resource.pyx b/python/rmm/_lib/memory_resource.pyx index 20d9931cf..4f9fd57ff 100644 --- a/python/rmm/_lib/memory_resource.pyx +++ b/python/rmm/_lib/memory_resource.pyx @@ -430,7 +430,7 @@ cpdef get_per_device_resource_type(int device): return type(get_per_device_resource(device)) -cpdef get_current_device_resource(): +cpdef MemoryResource get_current_device_resource(): """ Get the memory resource used for RMM device allocations on the current device. diff --git a/python/rmm/tests/test_rmm.py b/python/rmm/tests/test_rmm.py index 20755908b..74a8439b9 100644 --- a/python/rmm/tests/test_rmm.py +++ b/python/rmm/tests/test_rmm.py @@ -8,6 +8,7 @@ from numba import cuda import rmm +import rmm._cuda.stream if sys.version_info < (3, 8): try: @@ -378,3 +379,27 @@ def test_reinitialize_initial_pool_size_gt_max(): maximum_pool_size=1 << 10, ) assert "Initial pool size exceeds the maximum pool size" in str(e.value) + +def test_mr_object_lifetime(): + # Test ensures MR/Stream lifetime is longer than DeviceBuffer. Even if all references go out of scope + import gc + + # Create new Pool MR + rmm.mr.set_current_device_resource(rmm.mr.PoolMemoryResource(rmm.mr.get_current_device_resource())) + + # Creates a new non-default stream + stream = rmm._cuda.stream.Stream() + + # Allocate DeviceBuffer with Pool and Stream + a = rmm.DeviceBuffer(size=10, stream=stream) + + # Change current MR. Will cause Pool to go out of scope + rmm.mr.set_current_device_resource(rmm.mr.CudaMemoryResource()) + c = rmm.DeviceBuffer(size=10) + + # Force collection to ensure objects are cleaned up + gc.collect() + + # Delete a. Used to crash before. Pool MR should still be alive + del a + From 5164d035fb192557efa65197295c7d84daf0c8c6 Mon Sep 17 00:00:00 2001 From: Michael Demoret <42954918+mdemoret-nv@users.noreply.github.com> Date: Mon, 14 Dec 2020 20:44:51 -0700 Subject: [PATCH 2/6] Style Cleanup --- python/rmm/_lib/device_buffer.pxd | 4 ++-- python/rmm/_lib/device_buffer.pyx | 5 ++--- python/rmm/tests/test_rmm.py | 13 +++++++------ 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/python/rmm/_lib/device_buffer.pxd b/python/rmm/_lib/device_buffer.pxd index 76de9da45..9d18be8f5 100644 --- a/python/rmm/_lib/device_buffer.pxd +++ b/python/rmm/_lib/device_buffer.pxd @@ -44,8 +44,8 @@ cdef class DeviceBuffer: # deallocation cdef MemoryResource mr - # Holds a reference to the stream used by the underlying `device_buffer`. Ensures the - # stream does not get destroyed before this DeviceBuffer + # Holds a reference to the stream used by the underlying `device_buffer`. + # Ensures the stream does not get destroyed before this DeviceBuffer cdef Stream stream @staticmethod diff --git a/python/rmm/_lib/device_buffer.pyx b/python/rmm/_lib/device_buffer.pyx index d7b172fe9..0a65049f9 100644 --- a/python/rmm/_lib/device_buffer.pyx +++ b/python/rmm/_lib/device_buffer.pyx @@ -34,7 +34,7 @@ from rmm._lib.lib cimport ( ) from rmm._lib.memory_resource cimport get_current_device_resource - +@cython.no_gc cdef class DeviceBuffer: def __cinit__(self, *, @@ -81,12 +81,11 @@ cdef class DeviceBuffer: if stream.c_is_default(): stream.c_synchronize() - + # Save a reference to the MR and stream used for allocation self.mr = get_current_device_resource() self.stream = stream - def __len__(self): return self.size diff --git a/python/rmm/tests/test_rmm.py b/python/rmm/tests/test_rmm.py index 74a8439b9..18b239300 100644 --- a/python/rmm/tests/test_rmm.py +++ b/python/rmm/tests/test_rmm.py @@ -1,4 +1,5 @@ # Copyright (c) 2020, NVIDIA CORPORATION. +import gc import os import sys from itertools import product @@ -380,12 +381,14 @@ def test_reinitialize_initial_pool_size_gt_max(): ) assert "Initial pool size exceeds the maximum pool size" in str(e.value) -def test_mr_object_lifetime(): - # Test ensures MR/Stream lifetime is longer than DeviceBuffer. Even if all references go out of scope - import gc +def test_mr_devicebuffer_lifetime(): + # Test ensures MR/Stream lifetime is longer than DeviceBuffer. Even if all + # references go out of scope # Create new Pool MR - rmm.mr.set_current_device_resource(rmm.mr.PoolMemoryResource(rmm.mr.get_current_device_resource())) + rmm.mr.set_current_device_resource( + rmm.mr.PoolMemoryResource(rmm.mr.get_current_device_resource()) + ) # Creates a new non-default stream stream = rmm._cuda.stream.Stream() @@ -395,11 +398,9 @@ def test_mr_object_lifetime(): # Change current MR. Will cause Pool to go out of scope rmm.mr.set_current_device_resource(rmm.mr.CudaMemoryResource()) - c = rmm.DeviceBuffer(size=10) # Force collection to ensure objects are cleaned up gc.collect() # Delete a. Used to crash before. Pool MR should still be alive del a - From 928080058689fab79aacd6a2327c001c4642c775 Mon Sep 17 00:00:00 2001 From: Michael Demoret <42954918+mdemoret-nv@users.noreply.github.com> Date: Mon, 14 Dec 2020 20:51:49 -0700 Subject: [PATCH 3/6] Removing no_gc from DeviceBuffer --- python/rmm/_lib/device_buffer.pyx | 1 - 1 file changed, 1 deletion(-) diff --git a/python/rmm/_lib/device_buffer.pyx b/python/rmm/_lib/device_buffer.pyx index 0a65049f9..cc79408af 100644 --- a/python/rmm/_lib/device_buffer.pyx +++ b/python/rmm/_lib/device_buffer.pyx @@ -34,7 +34,6 @@ from rmm._lib.lib cimport ( ) from rmm._lib.memory_resource cimport get_current_device_resource -@cython.no_gc cdef class DeviceBuffer: def __cinit__(self, *, From abcab0519f3bbad73988962b227723ffe01dd6be Mon Sep 17 00:00:00 2001 From: Michael Demoret <42954918+mdemoret-nv@users.noreply.github.com> Date: Mon, 14 Dec 2020 21:20:47 -0700 Subject: [PATCH 4/6] Running isort --- python/rmm/_lib/device_buffer.pyx | 3 +++ 1 file changed, 3 insertions(+) diff --git a/python/rmm/_lib/device_buffer.pyx b/python/rmm/_lib/device_buffer.pyx index cc79408af..80da32f1b 100644 --- a/python/rmm/_lib/device_buffer.pyx +++ b/python/rmm/_lib/device_buffer.pyx @@ -22,7 +22,9 @@ from libcpp.utility cimport move from rmm._cuda.gpu cimport cudaError, cudaError_t from rmm._cuda.stream cimport Stream + from rmm._cuda.stream import DEFAULT_STREAM + from rmm._lib.lib cimport ( cudaMemcpyAsync, cudaMemcpyDeviceToDevice, @@ -34,6 +36,7 @@ from rmm._lib.lib cimport ( ) from rmm._lib.memory_resource cimport get_current_device_resource + cdef class DeviceBuffer: def __cinit__(self, *, From 623704da5246ba0b7f97376db35416322843d3f1 Mon Sep 17 00:00:00 2001 From: Michael Demoret <42954918+mdemoret-nv@users.noreply.github.com> Date: Mon, 14 Dec 2020 23:42:53 -0700 Subject: [PATCH 5/6] Add pytest fixture to auto reinitialize after every test. --- python/rmm/tests/test_rmm.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/python/rmm/tests/test_rmm.py b/python/rmm/tests/test_rmm.py index 18b239300..a1facb913 100644 --- a/python/rmm/tests/test_rmm.py +++ b/python/rmm/tests/test_rmm.py @@ -22,6 +22,17 @@ cuda.set_memory_manager(rmm.RMMNumbaManager) +@pytest.fixture(scope="function", autouse=True) +def rmm_auto_reinitialize(): + + # Run the test + yield + + # Automatically reinitialize the current memory resource after running each + # test + rmm.reinitialize() + + def array_tester(dtype, nelem, alloc): # data h_in = np.full(nelem, 3.2, dtype) @@ -72,7 +83,6 @@ def test_rmm_modes(dtype, nelem, alloc, managed, pool): assert rmm.is_initialized() array_tester(dtype, nelem, alloc) - rmm.reinitialize() @pytest.mark.parametrize("dtype", _dtypes) @@ -94,7 +104,6 @@ def test_rmm_csv_log(dtype, nelem, alloc, tmpdir): assert csv.find(b"Time,Action,Pointer,Size,Stream") >= 0 finally: os.remove(fname) - rmm.reinitialize() @pytest.mark.parametrize("size", [0, 5]) @@ -301,7 +310,6 @@ def test_pool_memory_resource(dtype, nelem, alloc): rmm.mr.set_current_device_resource(mr) assert rmm.mr.get_current_device_resource_type() is type(mr) array_tester(dtype, nelem, alloc) - rmm.reinitialize() @pytest.mark.parametrize("dtype", _dtypes) @@ -321,7 +329,6 @@ def test_fixed_size_memory_resource(dtype, nelem, alloc, upstream): rmm.mr.set_current_device_resource(mr) assert rmm.mr.get_current_device_resource_type() is type(mr) array_tester(dtype, nelem, alloc) - rmm.reinitialize() @pytest.mark.parametrize("dtype", _dtypes) @@ -352,7 +359,6 @@ def test_binning_memory_resource(dtype, nelem, alloc, upstream_mr): rmm.mr.set_current_device_resource(mr) assert rmm.mr.get_current_device_resource_type() is type(mr) array_tester(dtype, nelem, alloc) - rmm.reinitialize() def test_reinitialize_max_pool_size(): @@ -360,7 +366,6 @@ def test_reinitialize_max_pool_size(): pool_allocator=True, initial_pool_size=0, maximum_pool_size=1 << 23 ) rmm.DeviceBuffer().resize((1 << 23) - 1) - rmm.reinitialize() def test_reinitialize_max_pool_size_exceeded(): @@ -369,7 +374,6 @@ def test_reinitialize_max_pool_size_exceeded(): ) with pytest.raises(MemoryError): rmm.DeviceBuffer().resize(1 << 24) - rmm.reinitialize() def test_reinitialize_initial_pool_size_gt_max(): From 11ac17ab0ba33030dad5243855d7842eb264a9fb Mon Sep 17 00:00:00 2001 From: Michael Demoret <42954918+mdemoret-nv@users.noreply.github.com> Date: Tue, 23 Feb 2021 17:50:25 -0700 Subject: [PATCH 6/6] Fixing `__sizeof__` comparison on failing test. --- python/rmm/tests/test_rmm.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/python/rmm/tests/test_rmm.py b/python/rmm/tests/test_rmm.py index a8b02f0c0..dfd5b7264 100644 --- a/python/rmm/tests/test_rmm.py +++ b/python/rmm/tests/test_rmm.py @@ -120,7 +120,7 @@ def test_rmm_device_buffer(size): assert len(b) == b.size assert b.nbytes == b.size assert b.capacity() >= b.size - assert sys.getsizeof(b) == b.size + assert b.__sizeof__() == b.size # Test `__cuda_array_interface__` keyset = {"data", "shape", "strides", "typestr", "version"} @@ -384,7 +384,6 @@ def test_reinitialize_initial_pool_size_gt_max(): maximum_pool_size=1 << 10, ) assert "Initial pool size exceeds the maximum pool size" in str(e.value) - rmm.reinitialize() def test_mr_devicebuffer_lifetime(): @@ -434,5 +433,3 @@ def test_rmm_enable_disable_logging(dtype, nelem, alloc, tmpdir): os.remove(fname) rmm.disable_logging() - rmm.reinitialize() -