From 1375369863065af583da75a2afaac771b75a8e92 Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Tue, 9 Jul 2024 15:13:51 -0700 Subject: [PATCH 01/11] add python support for system mr --- python/rmm/rmm/_lib/memory_resource.pyx | 97 +++++++++++++++++++++---- python/rmm/rmm/rmm.py | 12 +++ 2 files changed, 95 insertions(+), 14 deletions(-) diff --git a/python/rmm/rmm/_lib/memory_resource.pyx b/python/rmm/rmm/_lib/memory_resource.pyx index e181bff4c..17aafe31b 100644 --- a/python/rmm/rmm/_lib/memory_resource.pyx +++ b/python/rmm/rmm/_lib/memory_resource.pyx @@ -94,6 +94,11 @@ cdef extern from "rmm/mr/device/managed_memory_resource.hpp" \ cdef cppclass managed_memory_resource(device_memory_resource): managed_memory_resource() except + +cdef extern from "rmm/mr/device/system_memory_resource.hpp" \ + namespace "rmm::mr" nogil: + cdef cppclass system_memory_resource(device_memory_resource): + system_memory_resource() except + + cdef extern from "rmm/mr/device/cuda_async_memory_resource.hpp" \ namespace "rmm::mr" nogil: @@ -170,6 +175,13 @@ cdef extern from "rmm/mr/device/limiting_resource_adaptor.hpp" \ size_t get_allocated_bytes() except + size_t get_allocation_limit() except + +cdef extern from "rmm/mr/device/sam_headroom_resource_adaptor.hpp" \ + namespace "rmm::mr" nogil: + cdef cppclass sam_headroom_resource_adaptor[Upstream](device_memory_resource): + sam_headroom_resource_adaptor( + Upstream* upstream_mr, + size_t headroom) except + + cdef extern from "rmm/mr/device/logging_resource_adaptor.hpp" \ namespace "rmm::mr" nogil: cdef cppclass logging_resource_adaptor[Upstream](device_memory_resource): @@ -361,6 +373,20 @@ cdef class ManagedMemoryResource(DeviceMemoryResource): pass +cdef class SystemMemoryResource(DeviceMemoryResource): + def __cinit__(self): + self.c_obj.reset( + new system_memory_resource() + ) + + def __init__(self): + """ + Memory resource that uses ``malloc``/``free`` for + allocation/deallocation. + """ + pass + + cdef class PoolMemoryResource(UpstreamResourceAdaptor): def __cinit__( @@ -718,6 +744,40 @@ cdef class LimitingResourceAdaptor(UpstreamResourceAdaptor): self.c_obj.get()) )[0].get_allocation_limit() +cdef class SamHeadroomResourceAdaptor(UpstreamResourceAdaptor): + def __cinit__( + self, + DeviceMemoryResource upstream_mr, + size_t headroom + ): + if not isinstance(upstream_mr, SystemMemoryResource): + raise TypeError("SamHeadroomResourceAdaptor requires a SystemMemoryResource") + cdef system_memory_resource *sys_mr = upstream_mr.get_mr() + self.c_obj.reset( + new sam_headroom_resource_adaptor[system_memory_resource]( + sys_mr, + headroom + ) + ) + + def __init__( + self, + DeviceMemoryResource upstream_mr, + size_t headroom + ): + """ + Memory resource that adapts system memory resource to allocate memory + with a headroom. + + Parameters + ---------- + upstream_mr : DeviceMemoryResource + The upstream memory resource. + headroom : size_t + Size of the reserved GPU memory as headroom + """ + pass + cdef class LoggingResourceAdaptor(UpstreamResourceAdaptor): def __cinit__( @@ -995,8 +1055,10 @@ cdef _per_device_mrs = defaultdict(CudaMemoryResource) cpdef void _initialize( bool pool_allocator=False, bool managed_memory=False, + bool system_memory=False, object initial_pool_size=None, object maximum_pool_size=None, + object system_memory_headroom_size=None, object devices=0, bool logging=False, object log_file_name=None, @@ -1006,21 +1068,11 @@ cpdef void _initialize( """ if managed_memory: upstream = ManagedMemoryResource + elif system_memory: + upstream = SystemMemoryResource else: upstream = CudaMemoryResource - if pool_allocator: - typ = PoolMemoryResource - args = (upstream(),) - kwargs = dict( - initial_pool_size=initial_pool_size, - maximum_pool_size=maximum_pool_size - ) - else: - typ = upstream - args = () - kwargs = {} - cdef DeviceMemoryResource mr cdef int original_device @@ -1046,13 +1098,30 @@ cpdef void _initialize( for device in devices: setDevice(device) + base_mr = upstream() + + if system_memory and system_memory_headroom_size is not None: + base_mr = SamHeadroomResourceAdaptor( + base_mr, + system_memory_headroom_size + ) + else: + base_mr = upstream() + + if pool_allocator: + base_mr = PoolMemoryResource( + base_mr, + initial_pool_size=initial_pool_size, + maximum_pool_size=maximum_pool_size + ) + if logging: mr = LoggingResourceAdaptor( - typ(*args, **kwargs), + base_mr, log_file_name ) else: - mr = typ(*args, **kwargs) + mr = base_mr set_per_device_resource(device, mr) diff --git a/python/rmm/rmm/rmm.py b/python/rmm/rmm/rmm.py index e5290905c..faede5e62 100644 --- a/python/rmm/rmm/rmm.py +++ b/python/rmm/rmm/rmm.py @@ -27,8 +27,10 @@ def __init__(self, errcode, msg): def reinitialize( pool_allocator=False, managed_memory=False, + system_memory=False, initial_pool_size=None, maximum_pool_size=None, + system_memory_headroom_size=None, devices=0, logging=False, log_file_name=None, @@ -45,6 +47,8 @@ def reinitialize( performance. managed_memory : bool, default False If True, use managed memory for device memory allocation + system_memory : bool, default False + If True, use system allocated memory for device memory allocation initial_pool_size : int, default None When `pool_allocator` is True, this indicates the initial pool size in bytes. By default, 1/2 of the total GPU memory is used. @@ -53,6 +57,12 @@ def reinitialize( When `pool_allocator` is True, this indicates the maximum pool size in bytes. By default, the total available memory on the GPU is used. When `pool_allocator` is False, this argument is ignored if provided. + system_memory_headroom_size : int, default None + When `system_memory` is True, this indicates the headroom size in bytes + to be reserved for CUDA calls not using system memory. By default, the + headroom is 0 and the total available memory on the GPU can be used by + system memory. + When `system_allocator` is False, this argument is ignored if provided. devices : int or List[int], default 0 GPU device IDs to register. By default registers only GPU 0. logging : bool, default False @@ -81,8 +91,10 @@ def reinitialize( mr._initialize( pool_allocator=pool_allocator, managed_memory=managed_memory, + system_memory=system_memory, initial_pool_size=initial_pool_size, maximum_pool_size=maximum_pool_size, + system_memory_headroom_size=system_memory_headroom_size, devices=devices, logging=logging, log_file_name=log_file_name, From 4c9bf49a936b860b463abea925c308c70b359383 Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Tue, 9 Jul 2024 16:33:36 -0700 Subject: [PATCH 02/11] managed and system are mutually exclusive --- python/rmm/rmm/_lib/memory_resource.pxd | 6 ++++++ python/rmm/rmm/_lib/memory_resource.pyx | 3 +++ python/rmm/rmm/rmm.py | 3 +++ python/rmm/rmm/tests/test_rmm.py | 4 ++++ 4 files changed, 16 insertions(+) diff --git a/python/rmm/rmm/_lib/memory_resource.pxd b/python/rmm/rmm/_lib/memory_resource.pxd index 15ddc84f6..d5433b6e5 100644 --- a/python/rmm/rmm/_lib/memory_resource.pxd +++ b/python/rmm/rmm/_lib/memory_resource.pxd @@ -52,6 +52,9 @@ cdef class CudaMemoryResource(DeviceMemoryResource): cdef class ManagedMemoryResource(DeviceMemoryResource): pass +cdef class SystemMemoryResource(DeviceMemoryResource): + pass + cdef class CudaAsyncMemoryResource(DeviceMemoryResource): pass @@ -77,6 +80,9 @@ cdef class CallbackMemoryResource(DeviceMemoryResource): cdef class LimitingResourceAdaptor(UpstreamResourceAdaptor): pass +cdef class SamHeadroomResourceAdaptor(UpstreamResourceAdaptor): + pass + cdef class LoggingResourceAdaptor(UpstreamResourceAdaptor): cdef object _log_file_name cpdef get_file_name(self) diff --git a/python/rmm/rmm/_lib/memory_resource.pyx b/python/rmm/rmm/_lib/memory_resource.pyx index 17aafe31b..20821ef99 100644 --- a/python/rmm/rmm/_lib/memory_resource.pyx +++ b/python/rmm/rmm/_lib/memory_resource.pyx @@ -1066,6 +1066,9 @@ cpdef void _initialize( """ Initializes RMM library using the options passed """ + if managed_memory and system_memory: + raise ValueError("managed_memory and system_memory cannot both be True") + if managed_memory: upstream = ManagedMemoryResource elif system_memory: diff --git a/python/rmm/rmm/rmm.py b/python/rmm/rmm/rmm.py index faede5e62..984dc3362 100644 --- a/python/rmm/rmm/rmm.py +++ b/python/rmm/rmm/rmm.py @@ -85,6 +85,9 @@ def reinitialize( with device ID ``1``. Use `rmm.get_log_filenames()` to get the log file names corresponding to each device. """ + if managed_memory and system_memory: + raise ValueError("managed_memory and system_memory cannot both be True") + for func, args, kwargs in reversed(_reinitialize_hooks): func(*args, **kwargs) diff --git a/python/rmm/rmm/tests/test_rmm.py b/python/rmm/rmm/tests/test_rmm.py index 05ffcf832..1f74657f2 100644 --- a/python/rmm/rmm/tests/test_rmm.py +++ b/python/rmm/rmm/tests/test_rmm.py @@ -38,6 +38,10 @@ _runtime_version >= 11020 ) +_SYSTEM_MEMORY_SUPPORTED = rmm._cuda.gpu.getDeviceAttribute( + cudart.cudaDeviceAttr.cudaDevAttrPageableMemoryAccess, + rmm._cuda.gpu.getDevice() +) def array_tester(dtype, nelem, alloc): # data From 48e16a9db706a9561206cd9364be2f17ca3db989 Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Wed, 10 Jul 2024 14:01:31 -0700 Subject: [PATCH 03/11] add tests for system mr --- python/rmm/rmm/mr.py | 2 ++ python/rmm/rmm/tests/test_rmm.py | 35 ++++++++++++++++++++++++++++++-- 2 files changed, 35 insertions(+), 2 deletions(-) diff --git a/python/rmm/rmm/mr.py b/python/rmm/rmm/mr.py index b6ae4e6cd..84453d9eb 100644 --- a/python/rmm/rmm/mr.py +++ b/python/rmm/rmm/mr.py @@ -23,7 +23,9 @@ LoggingResourceAdaptor, ManagedMemoryResource, PoolMemoryResource, + SamHeadroomResourceAdaptor, StatisticsResourceAdaptor, + SystemMemoryResource, TrackingResourceAdaptor, UpstreamResourceAdaptor, _flush_logs, diff --git a/python/rmm/rmm/tests/test_rmm.py b/python/rmm/rmm/tests/test_rmm.py index 1f74657f2..a4bac0276 100644 --- a/python/rmm/rmm/tests/test_rmm.py +++ b/python/rmm/rmm/tests/test_rmm.py @@ -95,6 +95,27 @@ def test_rmm_modes(dtype, nelem, alloc, managed, pool): array_tester(dtype, nelem, alloc) +@pytest.mark.skipif( + not _SYSTEM_MEMORY_SUPPORTED, + reason="System memory not supported", +) +@pytest.mark.parametrize("dtype", _dtypes) +@pytest.mark.parametrize("nelem", _nelems) +@pytest.mark.parametrize("alloc", _allocs) +@pytest.mark.parametrize( + "system, pool", list(product([False, True], [False, True])) +) +def test_rmm_modes_system_memory(dtype, nelem, alloc, system, pool): + assert rmm.is_initialized() + array_tester(dtype, nelem, alloc) + + rmm.reinitialize(pool_allocator=pool, system_memory=system) + + assert rmm.is_initialized() + + array_tester(dtype, nelem, alloc) + + @pytest.mark.parametrize("dtype", _dtypes) @pytest.mark.parametrize("nelem", _nelems) @pytest.mark.parametrize("alloc", _allocs) @@ -414,7 +435,9 @@ def test_pool_memory_resource(dtype, nelem, alloc): [ lambda: rmm.mr.CudaMemoryResource(), lambda: rmm.mr.ManagedMemoryResource(), - ], + ] + ( + [lambda: rmm.mr.SystemMemoryResource()] if _SYSTEM_MEMORY_SUPPORTED else [] + ) ) def test_fixed_size_memory_resource(dtype, nelem, alloc, upstream): mr = rmm.mr.FixedSizeMemoryResource( @@ -436,7 +459,9 @@ def test_fixed_size_memory_resource(dtype, nelem, alloc, upstream): lambda: rmm.mr.PoolMemoryResource( rmm.mr.CudaMemoryResource(), 1 << 20 ), - ], + ] + ( + [lambda: rmm.mr.SystemMemoryResource()] if _SYSTEM_MEMORY_SUPPORTED else [] + ) ) def test_binning_memory_resource(dtype, nelem, alloc, upstream_mr): upstream = upstream_mr() @@ -480,6 +505,12 @@ def test_reinitialize_initial_pool_size_gt_max(): assert "Initial pool size exceeds the maximum pool size" in str(e.value) +def test_reinitialize_both_managed_and_system(): + with pytest.raises(ValueError) as e: + rmm.reinitialize(managed_memory=True, system_memory=True) + assert "managed_memory and system_memory cannot both be True" in str(e.value) + + @pytest.mark.parametrize("dtype", _dtypes) @pytest.mark.parametrize("nelem", _nelems) @pytest.mark.parametrize("alloc", _allocs) From 6deb6fb2c23505e78fbd6e7e817aff38e337aa5c Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Wed, 10 Jul 2024 14:20:34 -0700 Subject: [PATCH 04/11] fix lint errors --- python/rmm/rmm/_lib/memory_resource.pyx | 7 +++++-- python/rmm/rmm/mr.py | 2 ++ python/rmm/rmm/rmm.py | 2 +- python/rmm/rmm/tests/test_rmm.py | 12 +++++++++--- 4 files changed, 17 insertions(+), 6 deletions(-) diff --git a/python/rmm/rmm/_lib/memory_resource.pyx b/python/rmm/rmm/_lib/memory_resource.pyx index 20821ef99..6d784b48d 100644 --- a/python/rmm/rmm/_lib/memory_resource.pyx +++ b/python/rmm/rmm/_lib/memory_resource.pyx @@ -751,8 +751,11 @@ cdef class SamHeadroomResourceAdaptor(UpstreamResourceAdaptor): size_t headroom ): if not isinstance(upstream_mr, SystemMemoryResource): - raise TypeError("SamHeadroomResourceAdaptor requires a SystemMemoryResource") - cdef system_memory_resource *sys_mr = upstream_mr.get_mr() + raise TypeError( + "SamHeadroomResourceAdaptor requires a SystemMemoryResource" + ) + cdef system_memory_resource *sys_mr =\ + upstream_mr.get_mr() self.c_obj.reset( new sam_headroom_resource_adaptor[system_memory_resource]( sys_mr, diff --git a/python/rmm/rmm/mr.py b/python/rmm/rmm/mr.py index 84453d9eb..92aaa992f 100644 --- a/python/rmm/rmm/mr.py +++ b/python/rmm/rmm/mr.py @@ -54,7 +54,9 @@ "LoggingResourceAdaptor", "ManagedMemoryResource", "PoolMemoryResource", + "SamHeadroomResourceAdaptor", "StatisticsResourceAdaptor", + "SystemMemoryResource", "TrackingResourceAdaptor", "FailureCallbackResourceAdaptor", "UpstreamResourceAdaptor", diff --git a/python/rmm/rmm/rmm.py b/python/rmm/rmm/rmm.py index 984dc3362..3d2c8cd57 100644 --- a/python/rmm/rmm/rmm.py +++ b/python/rmm/rmm/rmm.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. +# Copyright (c) 2019-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/python/rmm/rmm/tests/test_rmm.py b/python/rmm/rmm/tests/test_rmm.py index a4bac0276..5440c655f 100644 --- a/python/rmm/rmm/tests/test_rmm.py +++ b/python/rmm/rmm/tests/test_rmm.py @@ -436,7 +436,9 @@ def test_pool_memory_resource(dtype, nelem, alloc): lambda: rmm.mr.CudaMemoryResource(), lambda: rmm.mr.ManagedMemoryResource(), ] + ( - [lambda: rmm.mr.SystemMemoryResource()] if _SYSTEM_MEMORY_SUPPORTED else [] + [lambda: rmm.mr.SystemMemoryResource()] + if _SYSTEM_MEMORY_SUPPORTED + else [] ) ) def test_fixed_size_memory_resource(dtype, nelem, alloc, upstream): @@ -460,7 +462,9 @@ def test_fixed_size_memory_resource(dtype, nelem, alloc, upstream): rmm.mr.CudaMemoryResource(), 1 << 20 ), ] + ( - [lambda: rmm.mr.SystemMemoryResource()] if _SYSTEM_MEMORY_SUPPORTED else [] + [lambda: rmm.mr.SystemMemoryResource()] + if _SYSTEM_MEMORY_SUPPORTED + else [] ) ) def test_binning_memory_resource(dtype, nelem, alloc, upstream_mr): @@ -508,7 +512,9 @@ def test_reinitialize_initial_pool_size_gt_max(): def test_reinitialize_both_managed_and_system(): with pytest.raises(ValueError) as e: rmm.reinitialize(managed_memory=True, system_memory=True) - assert "managed_memory and system_memory cannot both be True" in str(e.value) + assert "managed_memory and system_memory cannot both be True" in str( + e.value + ) @pytest.mark.parametrize("dtype", _dtypes) From 283ee018094c517c192ca94b15df9a4b09857271 Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Wed, 10 Jul 2024 14:27:26 -0700 Subject: [PATCH 05/11] reformat --- python/rmm/rmm/rmm.py | 4 +++- python/rmm/rmm/tests/test_rmm.py | 13 ++++++++----- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/python/rmm/rmm/rmm.py b/python/rmm/rmm/rmm.py index 3d2c8cd57..22d3e0873 100644 --- a/python/rmm/rmm/rmm.py +++ b/python/rmm/rmm/rmm.py @@ -86,7 +86,9 @@ def reinitialize( names corresponding to each device. """ if managed_memory and system_memory: - raise ValueError("managed_memory and system_memory cannot both be True") + raise ValueError( + "managed_memory and system_memory cannot both be True" + ) for func, args, kwargs in reversed(_reinitialize_hooks): func(*args, **kwargs) diff --git a/python/rmm/rmm/tests/test_rmm.py b/python/rmm/rmm/tests/test_rmm.py index 5440c655f..443d625c5 100644 --- a/python/rmm/rmm/tests/test_rmm.py +++ b/python/rmm/rmm/tests/test_rmm.py @@ -40,9 +40,10 @@ _SYSTEM_MEMORY_SUPPORTED = rmm._cuda.gpu.getDeviceAttribute( cudart.cudaDeviceAttr.cudaDevAttrPageableMemoryAccess, - rmm._cuda.gpu.getDevice() + rmm._cuda.gpu.getDevice(), ) + def array_tester(dtype, nelem, alloc): # data h_in = np.full(nelem, 3.2, dtype) @@ -435,11 +436,12 @@ def test_pool_memory_resource(dtype, nelem, alloc): [ lambda: rmm.mr.CudaMemoryResource(), lambda: rmm.mr.ManagedMemoryResource(), - ] + ( + ] + + ( [lambda: rmm.mr.SystemMemoryResource()] if _SYSTEM_MEMORY_SUPPORTED else [] - ) + ), ) def test_fixed_size_memory_resource(dtype, nelem, alloc, upstream): mr = rmm.mr.FixedSizeMemoryResource( @@ -461,11 +463,12 @@ def test_fixed_size_memory_resource(dtype, nelem, alloc, upstream): lambda: rmm.mr.PoolMemoryResource( rmm.mr.CudaMemoryResource(), 1 << 20 ), - ] + ( + ] + + ( [lambda: rmm.mr.SystemMemoryResource()] if _SYSTEM_MEMORY_SUPPORTED else [] - ) + ), ) def test_binning_memory_resource(dtype, nelem, alloc, upstream_mr): upstream = upstream_mr() From 35daa579bd4c2699eb76e30739c5229fde540930 Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Tue, 23 Jul 2024 12:46:21 -0700 Subject: [PATCH 06/11] hard code system mr in sam headroom adaptor --- python/rmm/rmm/_lib/memory_resource.pyx | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/python/rmm/rmm/_lib/memory_resource.pyx b/python/rmm/rmm/_lib/memory_resource.pyx index fbbc40adf..3e5aaec74 100644 --- a/python/rmm/rmm/_lib/memory_resource.pyx +++ b/python/rmm/rmm/_lib/memory_resource.pyx @@ -752,25 +752,18 @@ cdef class LimitingResourceAdaptor(UpstreamResourceAdaptor): cdef class SamHeadroomResourceAdaptor(UpstreamResourceAdaptor): def __cinit__( self, - DeviceMemoryResource upstream_mr, size_t headroom ): - if not isinstance(upstream_mr, SystemMemoryResource): - raise TypeError( - "SamHeadroomResourceAdaptor requires a SystemMemoryResource" - ) - cdef system_memory_resource *sys_mr =\ - upstream_mr.get_mr() + self.upstream_mr = SystemMemoryResource() self.c_obj.reset( new sam_headroom_resource_adaptor[system_memory_resource]( - sys_mr, + self.upstream_mr.get_mr(), headroom ) ) def __init__( self, - DeviceMemoryResource upstream_mr, size_t headroom ): """ @@ -779,8 +772,6 @@ cdef class SamHeadroomResourceAdaptor(UpstreamResourceAdaptor): Parameters ---------- - upstream_mr : DeviceMemoryResource - The upstream memory resource. headroom : size_t Size of the reserved GPU memory as headroom """ @@ -1135,11 +1126,8 @@ cpdef void _initialize( for device in devices: setDevice(device) - base_mr = upstream() - if system_memory and system_memory_headroom_size is not None: base_mr = SamHeadroomResourceAdaptor( - base_mr, system_memory_headroom_size ) else: From d1320da169dc1ed0db5218106508e8937c8cca8c Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Tue, 23 Jul 2024 18:36:55 -0700 Subject: [PATCH 07/11] revert change to reinitialize --- python/rmm/rmm/_lib/memory_resource.pxd | 6 +- python/rmm/rmm/_lib/memory_resource.pyx | 94 +++++++++++-------------- python/rmm/rmm/rmm.py | 17 ----- python/rmm/rmm/tests/test_rmm.py | 30 ++++---- 4 files changed, 63 insertions(+), 84 deletions(-) diff --git a/python/rmm/rmm/_lib/memory_resource.pxd b/python/rmm/rmm/_lib/memory_resource.pxd index 5d3f833e8..75754b85c 100644 --- a/python/rmm/rmm/_lib/memory_resource.pxd +++ b/python/rmm/rmm/_lib/memory_resource.pxd @@ -55,6 +55,9 @@ cdef class ManagedMemoryResource(DeviceMemoryResource): cdef class SystemMemoryResource(DeviceMemoryResource): pass +cdef class SamHeadroomResourceAdaptor(DeviceMemoryResource): + cdef readonly DeviceMemoryResource system_mr + cdef class CudaAsyncMemoryResource(DeviceMemoryResource): pass @@ -80,9 +83,6 @@ cdef class CallbackMemoryResource(DeviceMemoryResource): cdef class LimitingResourceAdaptor(UpstreamResourceAdaptor): pass -cdef class SamHeadroomResourceAdaptor(UpstreamResourceAdaptor): - pass - cdef class LoggingResourceAdaptor(UpstreamResourceAdaptor): cdef object _log_file_name cpdef get_file_name(self) diff --git a/python/rmm/rmm/_lib/memory_resource.pyx b/python/rmm/rmm/_lib/memory_resource.pyx index 3e5aaec74..25a08ce2e 100644 --- a/python/rmm/rmm/_lib/memory_resource.pyx +++ b/python/rmm/rmm/_lib/memory_resource.pyx @@ -392,6 +392,35 @@ cdef class SystemMemoryResource(DeviceMemoryResource): pass +cdef class SamHeadroomResourceAdaptor(DeviceMemoryResource): + def __cinit__( + self, + size_t headroom + ): + self.system_mr = SystemMemoryResource() + self.c_obj.reset( + new sam_headroom_resource_adaptor[system_memory_resource]( + self.system_mr.get_mr(), + headroom + ) + ) + + def __init__( + self, + size_t headroom + ): + """ + Memory resource that adapts system memory resource to allocate memory + with a headroom. + + Parameters + ---------- + headroom : size_t + Size of the reserved GPU memory as headroom + """ + pass + + cdef class PoolMemoryResource(UpstreamResourceAdaptor): def __cinit__( @@ -749,34 +778,6 @@ cdef class LimitingResourceAdaptor(UpstreamResourceAdaptor): self.c_obj.get()) )[0].get_allocation_limit() -cdef class SamHeadroomResourceAdaptor(UpstreamResourceAdaptor): - def __cinit__( - self, - size_t headroom - ): - self.upstream_mr = SystemMemoryResource() - self.c_obj.reset( - new sam_headroom_resource_adaptor[system_memory_resource]( - self.upstream_mr.get_mr(), - headroom - ) - ) - - def __init__( - self, - size_t headroom - ): - """ - Memory resource that adapts system memory resource to allocate memory - with a headroom. - - Parameters - ---------- - headroom : size_t - Size of the reserved GPU memory as headroom - """ - pass - cdef class LoggingResourceAdaptor(UpstreamResourceAdaptor): def __cinit__( @@ -1080,10 +1081,8 @@ cdef _per_device_mrs = defaultdict(CudaMemoryResource) cpdef void _initialize( bool pool_allocator=False, bool managed_memory=False, - bool system_memory=False, object initial_pool_size=None, object maximum_pool_size=None, - object system_memory_headroom_size=None, object devices=0, bool logging=False, object log_file_name=None, @@ -1091,16 +1090,23 @@ cpdef void _initialize( """ Initializes RMM library using the options passed """ - if managed_memory and system_memory: - raise ValueError("managed_memory and system_memory cannot both be True") - if managed_memory: upstream = ManagedMemoryResource - elif system_memory: - upstream = SystemMemoryResource else: upstream = CudaMemoryResource + if pool_allocator: + typ = PoolMemoryResource + args = (upstream(),) + kwargs = dict( + initial_pool_size=initial_pool_size, + maximum_pool_size=maximum_pool_size + ) + else: + typ = upstream + args = () + kwargs = {} + cdef DeviceMemoryResource mr cdef int original_device @@ -1126,27 +1132,13 @@ cpdef void _initialize( for device in devices: setDevice(device) - if system_memory and system_memory_headroom_size is not None: - base_mr = SamHeadroomResourceAdaptor( - system_memory_headroom_size - ) - else: - base_mr = upstream() - - if pool_allocator: - base_mr = PoolMemoryResource( - base_mr, - initial_pool_size=initial_pool_size, - maximum_pool_size=maximum_pool_size - ) - if logging: mr = LoggingResourceAdaptor( - base_mr, + typ(*args, **kwargs), log_file_name ) else: - mr = base_mr + mr = typ(*args, **kwargs) set_per_device_resource(device, mr) diff --git a/python/rmm/rmm/rmm.py b/python/rmm/rmm/rmm.py index 22d3e0873..0200b428e 100644 --- a/python/rmm/rmm/rmm.py +++ b/python/rmm/rmm/rmm.py @@ -27,10 +27,8 @@ def __init__(self, errcode, msg): def reinitialize( pool_allocator=False, managed_memory=False, - system_memory=False, initial_pool_size=None, maximum_pool_size=None, - system_memory_headroom_size=None, devices=0, logging=False, log_file_name=None, @@ -47,8 +45,6 @@ def reinitialize( performance. managed_memory : bool, default False If True, use managed memory for device memory allocation - system_memory : bool, default False - If True, use system allocated memory for device memory allocation initial_pool_size : int, default None When `pool_allocator` is True, this indicates the initial pool size in bytes. By default, 1/2 of the total GPU memory is used. @@ -57,12 +53,6 @@ def reinitialize( When `pool_allocator` is True, this indicates the maximum pool size in bytes. By default, the total available memory on the GPU is used. When `pool_allocator` is False, this argument is ignored if provided. - system_memory_headroom_size : int, default None - When `system_memory` is True, this indicates the headroom size in bytes - to be reserved for CUDA calls not using system memory. By default, the - headroom is 0 and the total available memory on the GPU can be used by - system memory. - When `system_allocator` is False, this argument is ignored if provided. devices : int or List[int], default 0 GPU device IDs to register. By default registers only GPU 0. logging : bool, default False @@ -85,21 +75,14 @@ def reinitialize( with device ID ``1``. Use `rmm.get_log_filenames()` to get the log file names corresponding to each device. """ - if managed_memory and system_memory: - raise ValueError( - "managed_memory and system_memory cannot both be True" - ) - for func, args, kwargs in reversed(_reinitialize_hooks): func(*args, **kwargs) mr._initialize( pool_allocator=pool_allocator, managed_memory=managed_memory, - system_memory=system_memory, initial_pool_size=initial_pool_size, maximum_pool_size=maximum_pool_size, - system_memory_headroom_size=system_memory_headroom_size, devices=devices, logging=logging, log_file_name=log_file_name, diff --git a/python/rmm/rmm/tests/test_rmm.py b/python/rmm/rmm/tests/test_rmm.py index 16a867325..d825cf684 100644 --- a/python/rmm/rmm/tests/test_rmm.py +++ b/python/rmm/rmm/tests/test_rmm.py @@ -104,13 +104,23 @@ def test_rmm_modes(dtype, nelem, alloc, managed, pool): @pytest.mark.parametrize("nelem", _nelems) @pytest.mark.parametrize("alloc", _allocs) @pytest.mark.parametrize( - "system, pool", list(product([False, True], [False, True])) + "system, pool, headroom", list(product([False, True], [False, True], [False, True])) ) -def test_rmm_modes_system_memory(dtype, nelem, alloc, system, pool): +def test_rmm_modes_system_memory(dtype, nelem, alloc, system, pool, headroom): assert rmm.is_initialized() array_tester(dtype, nelem, alloc) - rmm.reinitialize(pool_allocator=pool, system_memory=system) + if system and headroom: + base_mr = rmm.mr.SamHeadroomResourceAdaptor(headroom=1 << 20) + elif system: + base_mr = rmm.mr.SystemMemoryResource() + else: + base_mr = rmm.mr.CudaMemoryResource() + if pool: + mr = rmm.mr.PoolMemoryResource(base_mr) + else: + mr = base_mr + rmm.mr.set_current_device_resource(mr) assert rmm.is_initialized() @@ -438,7 +448,8 @@ def test_pool_memory_resource(dtype, nelem, alloc): lambda: rmm.mr.ManagedMemoryResource(), ] + ( - [lambda: rmm.mr.SystemMemoryResource()] + [lambda: rmm.mr.SystemMemoryResource(), + lambda: rmm.mr.SamHeadroomResourceAdaptor(headroom=1 << 20)] if _SYSTEM_MEMORY_SUPPORTED else [] ), @@ -465,7 +476,8 @@ def test_fixed_size_memory_resource(dtype, nelem, alloc, upstream): ), ] + ( - [lambda: rmm.mr.SystemMemoryResource()] + [lambda: rmm.mr.SystemMemoryResource(), + lambda: rmm.mr.SamHeadroomResourceAdaptor(headroom=1 << 20)] if _SYSTEM_MEMORY_SUPPORTED else [] ), @@ -512,14 +524,6 @@ def test_reinitialize_initial_pool_size_gt_max(): assert "Initial pool size exceeds the maximum pool size" in str(e.value) -def test_reinitialize_both_managed_and_system(): - with pytest.raises(ValueError) as e: - rmm.reinitialize(managed_memory=True, system_memory=True) - assert "managed_memory and system_memory cannot both be True" in str( - e.value - ) - - @pytest.mark.parametrize("dtype", _dtypes) @pytest.mark.parametrize("nelem", _nelems) @pytest.mark.parametrize("alloc", _allocs) From 005e522f7e4b2c20e79a990b53ba0a32ccdce381 Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Wed, 24 Jul 2024 08:08:27 -0700 Subject: [PATCH 08/11] fix pre commit errors --- python/rmm/rmm/rmm.py | 2 +- python/rmm/rmm/tests/test_rmm.py | 15 ++++++++++----- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/python/rmm/rmm/rmm.py b/python/rmm/rmm/rmm.py index 0200b428e..e5290905c 100644 --- a/python/rmm/rmm/rmm.py +++ b/python/rmm/rmm/rmm.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2024, NVIDIA CORPORATION. +# Copyright (c) 2019, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/python/rmm/rmm/tests/test_rmm.py b/python/rmm/rmm/tests/test_rmm.py index d825cf684..dc2030a79 100644 --- a/python/rmm/rmm/tests/test_rmm.py +++ b/python/rmm/rmm/tests/test_rmm.py @@ -104,7 +104,8 @@ def test_rmm_modes(dtype, nelem, alloc, managed, pool): @pytest.mark.parametrize("nelem", _nelems) @pytest.mark.parametrize("alloc", _allocs) @pytest.mark.parametrize( - "system, pool, headroom", list(product([False, True], [False, True], [False, True])) + "system, pool, headroom", + list(product([False, True], [False, True], [False, True])), ) def test_rmm_modes_system_memory(dtype, nelem, alloc, system, pool, headroom): assert rmm.is_initialized() @@ -448,8 +449,10 @@ def test_pool_memory_resource(dtype, nelem, alloc): lambda: rmm.mr.ManagedMemoryResource(), ] + ( - [lambda: rmm.mr.SystemMemoryResource(), - lambda: rmm.mr.SamHeadroomResourceAdaptor(headroom=1 << 20)] + [ + lambda: rmm.mr.SystemMemoryResource(), + lambda: rmm.mr.SamHeadroomResourceAdaptor(headroom=1 << 20), + ] if _SYSTEM_MEMORY_SUPPORTED else [] ), @@ -476,8 +479,10 @@ def test_fixed_size_memory_resource(dtype, nelem, alloc, upstream): ), ] + ( - [lambda: rmm.mr.SystemMemoryResource(), - lambda: rmm.mr.SamHeadroomResourceAdaptor(headroom=1 << 20)] + [ + lambda: rmm.mr.SystemMemoryResource(), + lambda: rmm.mr.SamHeadroomResourceAdaptor(headroom=1 << 20), + ] if _SYSTEM_MEMORY_SUPPORTED else [] ), From 6041192d3a3b6f21ad344f388e72eca0e9a1f9a1 Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Wed, 24 Jul 2024 15:01:46 -0700 Subject: [PATCH 09/11] change sam headroom adaptor to mr --- ...r.hpp => sam_headroom_memory_resource.hpp} | 58 ++++++------------- python/rmm/rmm/_lib/memory_resource.pxd | 3 - python/rmm/rmm/_lib/memory_resource.pyx | 49 ++++++---------- python/rmm/rmm/mr.py | 2 - python/rmm/rmm/tests/test_rmm.py | 12 ++-- tests/mr/device/system_mr_tests.cu | 39 ++++--------- 6 files changed, 53 insertions(+), 110 deletions(-) rename include/rmm/mr/device/{sam_headroom_resource_adaptor.hpp => sam_headroom_memory_resource.hpp} (63%) diff --git a/include/rmm/mr/device/sam_headroom_resource_adaptor.hpp b/include/rmm/mr/device/sam_headroom_memory_resource.hpp similarity index 63% rename from include/rmm/mr/device/sam_headroom_resource_adaptor.hpp rename to include/rmm/mr/device/sam_headroom_memory_resource.hpp index fc913f290..418b9599d 100644 --- a/include/rmm/mr/device/sam_headroom_resource_adaptor.hpp +++ b/include/rmm/mr/device/sam_headroom_memory_resource.hpp @@ -23,12 +23,12 @@ namespace rmm::mr { /** - * @addtogroup device_resource_adaptors + * @addtogroup device_memory_resources * @{ * @file */ /** - * @brief Resource that adapts system memory resource to allocate memory with a headroom. + * @brief Resource that uses system memory resource to allocate memory with a headroom. * * System allocated memory (SAM) can be migrated to the GPU, but is never migrated back the host. If * GPU memory is over-subscribed, this can cause other CUDA calls to fail with out-of-memory errors. @@ -39,46 +39,22 @@ namespace rmm::mr { * Since doing this check on every allocation can be expensive, the caller may choose to use other * allocators (e.g. `binning_memory_resource`) for small allocations, and use this allocator for * large allocations only. - * - * @tparam Upstream Type of the upstream resource used for allocation/deallocation. Must be - * `system_memory_resource`. */ -template -class sam_headroom_resource_adaptor final : public device_memory_resource { +class sam_headroom_memory_resource final : public device_memory_resource { public: /** - * @brief Construct a headroom adaptor using `upstream` to satisfy allocation requests. + * @brief Construct a headroom memory resource. * - * @param upstream The resource used for allocating/deallocating device memory. Must be - * `system_memory_resource`. * @param headroom Size of the reserved GPU memory as headroom */ - explicit sam_headroom_resource_adaptor(Upstream* upstream, std::size_t headroom) - : upstream_{upstream}, headroom_{headroom} - { - static_assert(std::is_same_v, - "Upstream must be rmm::mr::system_memory_resource"); - } + explicit sam_headroom_memory_resource(std::size_t headroom) : system_mr_{}, headroom_{headroom} {} - sam_headroom_resource_adaptor() = delete; - ~sam_headroom_resource_adaptor() override = default; - sam_headroom_resource_adaptor(sam_headroom_resource_adaptor const&) = delete; - sam_headroom_resource_adaptor(sam_headroom_resource_adaptor&&) = delete; - sam_headroom_resource_adaptor& operator=(sam_headroom_resource_adaptor const&) = delete; - sam_headroom_resource_adaptor& operator=(sam_headroom_resource_adaptor&&) = delete; - - /** - * @briefreturn{rmm::device_async_resource_ref to the upstream resource} - */ - [[nodiscard]] rmm::device_async_resource_ref get_upstream_resource() const noexcept - { - return upstream_; - } - - /** - * @briefreturn{Upstream* to the upstream memory resource} - */ - [[nodiscard]] Upstream* get_upstream() const noexcept { return upstream_; } + sam_headroom_memory_resource() = delete; + ~sam_headroom_memory_resource() override = default; + sam_headroom_memory_resource(sam_headroom_memory_resource const&) = delete; + sam_headroom_memory_resource(sam_headroom_memory_resource&&) = delete; + sam_headroom_memory_resource& operator=(sam_headroom_memory_resource const&) = delete; + sam_headroom_memory_resource& operator=(sam_headroom_memory_resource&&) = delete; private: /** @@ -94,8 +70,7 @@ class sam_headroom_resource_adaptor final : public device_memory_resource { */ void* do_allocate(std::size_t bytes, [[maybe_unused]] cuda_stream_view stream) override { - void* pointer = - get_upstream_resource().allocate_async(bytes, rmm::CUDA_ALLOCATION_ALIGNMENT, stream); + void* pointer = system_mr_.allocate_async(bytes, rmm::CUDA_ALLOCATION_ALIGNMENT, stream); auto const free = rmm::available_device_memory().first; auto const allocatable = free > headroom_ ? free - headroom_ : 0UL; @@ -131,7 +106,7 @@ class sam_headroom_resource_adaptor final : public device_memory_resource { [[maybe_unused]] std::size_t bytes, [[maybe_unused]] cuda_stream_view stream) override { - get_upstream_resource().deallocate_async(ptr, rmm::CUDA_ALLOCATION_ALIGNMENT, stream); + system_mr_.deallocate_async(ptr, rmm::CUDA_ALLOCATION_ALIGNMENT, stream); } /** @@ -144,12 +119,13 @@ class sam_headroom_resource_adaptor final : public device_memory_resource { [[nodiscard]] bool do_is_equal(device_memory_resource const& other) const noexcept override { if (this == &other) { return true; } - auto cast = dynamic_cast(&other); + auto cast = dynamic_cast(&other); if (cast == nullptr) { return false; } - return get_upstream_resource() == cast->get_upstream_resource() && headroom_ == cast->headroom_; + return headroom_ == cast->headroom_; } - Upstream* upstream_; ///< The upstream resource used for satisfying allocation requests + system_memory_resource + system_mr_; ///< The system memory resource used for satisfying allocation requests std::size_t headroom_; ///< Size of GPU memory reserved as headroom }; /** @} */ // end of group diff --git a/python/rmm/rmm/_lib/memory_resource.pxd b/python/rmm/rmm/_lib/memory_resource.pxd index 75754b85c..95f125e0f 100644 --- a/python/rmm/rmm/_lib/memory_resource.pxd +++ b/python/rmm/rmm/_lib/memory_resource.pxd @@ -55,9 +55,6 @@ cdef class ManagedMemoryResource(DeviceMemoryResource): cdef class SystemMemoryResource(DeviceMemoryResource): pass -cdef class SamHeadroomResourceAdaptor(DeviceMemoryResource): - cdef readonly DeviceMemoryResource system_mr - cdef class CudaAsyncMemoryResource(DeviceMemoryResource): pass diff --git a/python/rmm/rmm/_lib/memory_resource.pyx b/python/rmm/rmm/_lib/memory_resource.pyx index 25a08ce2e..84e170beb 100644 --- a/python/rmm/rmm/_lib/memory_resource.pyx +++ b/python/rmm/rmm/_lib/memory_resource.pyx @@ -99,6 +99,11 @@ cdef extern from "rmm/mr/device/system_memory_resource.hpp" \ cdef cppclass system_memory_resource(device_memory_resource): system_memory_resource() except + +cdef extern from "rmm/mr/device/sam_headroom_memory_resource.hpp" \ + namespace "rmm::mr" nogil: + cdef cppclass sam_headroom_memory_resource(device_memory_resource): + sam_headroom_memory_resource(size_t headroom) except + + cdef extern from "rmm/mr/device/cuda_async_memory_resource.hpp" \ namespace "rmm::mr" nogil: @@ -175,13 +180,6 @@ cdef extern from "rmm/mr/device/limiting_resource_adaptor.hpp" \ size_t get_allocated_bytes() except + size_t get_allocation_limit() except + -cdef extern from "rmm/mr/device/sam_headroom_resource_adaptor.hpp" \ - namespace "rmm::mr" nogil: - cdef cppclass sam_headroom_resource_adaptor[Upstream](device_memory_resource): - sam_headroom_resource_adaptor( - Upstream* upstream_mr, - size_t headroom) except + - cdef extern from "rmm/mr/device/logging_resource_adaptor.hpp" \ namespace "rmm::mr" nogil: cdef cppclass logging_resource_adaptor[Upstream](device_memory_resource): @@ -379,39 +377,28 @@ cdef class ManagedMemoryResource(DeviceMemoryResource): cdef class SystemMemoryResource(DeviceMemoryResource): - def __cinit__(self): - self.c_obj.reset( - new system_memory_resource() - ) - - def __init__(self): - """ - Memory resource that uses ``malloc``/``free`` for - allocation/deallocation. - """ - pass - - -cdef class SamHeadroomResourceAdaptor(DeviceMemoryResource): def __cinit__( self, - size_t headroom + headroom=None ): - self.system_mr = SystemMemoryResource() - self.c_obj.reset( - new sam_headroom_resource_adaptor[system_memory_resource]( - self.system_mr.get_mr(), - headroom + if headroom is None: + self.c_obj.reset( + new system_memory_resource() + ) + else: + self.c_obj.reset( + new sam_headroom_memory_resource( + headroom + ) ) - ) def __init__( self, - size_t headroom + headroom=None ): """ - Memory resource that adapts system memory resource to allocate memory - with a headroom. + Memory resource that uses ``malloc``/``free`` for + allocation/deallocation. Parameters ---------- diff --git a/python/rmm/rmm/mr.py b/python/rmm/rmm/mr.py index 66e535335..c2e881836 100644 --- a/python/rmm/rmm/mr.py +++ b/python/rmm/rmm/mr.py @@ -24,7 +24,6 @@ ManagedMemoryResource, PoolMemoryResource, PrefetchResourceAdaptor, - SamHeadroomResourceAdaptor, StatisticsResourceAdaptor, SystemMemoryResource, TrackingResourceAdaptor, @@ -56,7 +55,6 @@ "ManagedMemoryResource", "PoolMemoryResource", "PrefetchResourceAdaptor", - "SamHeadroomResourceAdaptor", "StatisticsResourceAdaptor", "SystemMemoryResource", "TrackingResourceAdaptor", diff --git a/python/rmm/rmm/tests/test_rmm.py b/python/rmm/rmm/tests/test_rmm.py index dc2030a79..e28b3b8b4 100644 --- a/python/rmm/rmm/tests/test_rmm.py +++ b/python/rmm/rmm/tests/test_rmm.py @@ -111,10 +111,10 @@ def test_rmm_modes_system_memory(dtype, nelem, alloc, system, pool, headroom): assert rmm.is_initialized() array_tester(dtype, nelem, alloc) - if system and headroom: - base_mr = rmm.mr.SamHeadroomResourceAdaptor(headroom=1 << 20) - elif system: - base_mr = rmm.mr.SystemMemoryResource() + if system: + base_mr = rmm.mr.SystemMemoryResource( + headroom=1 << 20 if headroom else None + ) else: base_mr = rmm.mr.CudaMemoryResource() if pool: @@ -451,7 +451,7 @@ def test_pool_memory_resource(dtype, nelem, alloc): + ( [ lambda: rmm.mr.SystemMemoryResource(), - lambda: rmm.mr.SamHeadroomResourceAdaptor(headroom=1 << 20), + lambda: rmm.mr.SystemMemoryResource(headroom=1 << 20), ] if _SYSTEM_MEMORY_SUPPORTED else [] @@ -481,7 +481,7 @@ def test_fixed_size_memory_resource(dtype, nelem, alloc, upstream): + ( [ lambda: rmm.mr.SystemMemoryResource(), - lambda: rmm.mr.SamHeadroomResourceAdaptor(headroom=1 << 20), + lambda: rmm.mr.SystemMemoryResource(headroom=1 << 20), ] if _SYSTEM_MEMORY_SUPPORTED else [] diff --git a/tests/mr/device/system_mr_tests.cu b/tests/mr/device/system_mr_tests.cu index 079afeb14..00084c4e2 100644 --- a/tests/mr/device/system_mr_tests.cu +++ b/tests/mr/device/system_mr_tests.cu @@ -18,7 +18,7 @@ #include #include -#include +#include #include #include @@ -54,9 +54,9 @@ void touch_on_gpu(void* ptr, std::size_t size) using system_mr = rmm::mr::system_memory_resource; static_assert(cuda::mr::resource_with); static_assert(cuda::mr::async_resource_with); -using headroom_adaptor = rmm::mr::sam_headroom_resource_adaptor; -static_assert(cuda::mr::resource_with); -static_assert(cuda::mr::async_resource_with); +using headroom_mr = rmm::mr::sam_headroom_memory_resource; +static_assert(cuda::mr::resource_with); +static_assert(cuda::mr::async_resource_with); class SystemMRTest : public ::testing::Test { protected: @@ -79,19 +79,6 @@ TEST(SystemMRSimpleTest, ThrowIfNotSupported) } } -TEST(SAMHeadroomAdaptorTest, ThrowIfNotSupported) -{ - auto construct_mr = []() { - system_mr mr; - headroom_adaptor adaptor{&mr, 0}; - }; - if (rmm::mr::detail::is_system_memory_supported(rmm::get_current_cuda_device())) { - EXPECT_NO_THROW(construct_mr()); - } else { - EXPECT_THROW(construct_mr(), rmm::logic_error); - } -} - TEST_F(SystemMRTest, FirstTouchOnCPU) { auto const free = rmm::available_device_memory().first; @@ -114,23 +101,21 @@ TEST_F(SystemMRTest, FirstTouchOnGPU) mr.deallocate(ptr, size_mb); } -TEST_F(SystemMRTest, AdaptorReserveAllFreeMemory) +TEST_F(SystemMRTest, HeadroomMRReserveAllFreeMemory) { auto const free = rmm::available_device_memory().first; - system_mr mr; // All the free GPU memory is set as headroom, so allocation is only on the CPU. - headroom_adaptor adaptor{&mr, free + size_gb}; - void* ptr = adaptor.allocate(size_mb); + headroom_mr mr{free + size_gb}; + void* ptr = mr.allocate(size_mb); touch_on_cpu(ptr, size_mb); - adaptor.deallocate(ptr, size_mb); + mr.deallocate(ptr, size_mb); } -TEST_F(SystemMRTest, AdaptorDifferentParametersUnequal) +TEST_F(SystemMRTest, HeadroomMRDifferentParametersUnequal) { - system_mr mr; - headroom_adaptor adaptor1{&mr, size_mb}; - headroom_adaptor adaptor2{&mr, size_gb}; - EXPECT_FALSE(adaptor1.is_equal(adaptor2)); + headroom_mr mr1{size_mb}; + headroom_mr mr2{size_gb}; + EXPECT_FALSE(mr1.is_equal(mr2)); } } // namespace } // namespace rmm::test From 2d017e34580a36e25759bb9850a589de55259cb1 Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Wed, 24 Jul 2024 15:05:12 -0700 Subject: [PATCH 10/11] better formatting --- include/rmm/mr/device/sam_headroom_memory_resource.hpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/include/rmm/mr/device/sam_headroom_memory_resource.hpp b/include/rmm/mr/device/sam_headroom_memory_resource.hpp index 418b9599d..cdf14181f 100644 --- a/include/rmm/mr/device/sam_headroom_memory_resource.hpp +++ b/include/rmm/mr/device/sam_headroom_memory_resource.hpp @@ -124,9 +124,10 @@ class sam_headroom_memory_resource final : public device_memory_resource { return headroom_ == cast->headroom_; } - system_memory_resource - system_mr_; ///< The system memory resource used for satisfying allocation requests - std::size_t headroom_; ///< Size of GPU memory reserved as headroom + ///< The system memory resource used for satisfying allocation requests + system_memory_resource system_mr_; + ///< Size of GPU memory reserved as headroom + std::size_t headroom_; }; /** @} */ // end of group } // namespace rmm::mr From f8cbf80b587c4e8adc270eba260654af7dfc0b5c Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Wed, 24 Jul 2024 17:19:40 -0700 Subject: [PATCH 11/11] separate out sam headroom mr in python --- python/rmm/rmm/_lib/memory_resource.pxd | 3 +++ python/rmm/rmm/_lib/memory_resource.pyx | 29 +++++++++++++++---------- python/rmm/rmm/mr.py | 2 ++ python/rmm/rmm/tests/test_rmm.py | 11 +++++----- 4 files changed, 28 insertions(+), 17 deletions(-) diff --git a/python/rmm/rmm/_lib/memory_resource.pxd b/python/rmm/rmm/_lib/memory_resource.pxd index 95f125e0f..000a3fe1e 100644 --- a/python/rmm/rmm/_lib/memory_resource.pxd +++ b/python/rmm/rmm/_lib/memory_resource.pxd @@ -55,6 +55,9 @@ cdef class ManagedMemoryResource(DeviceMemoryResource): cdef class SystemMemoryResource(DeviceMemoryResource): pass +cdef class SamHeadroomMemoryResource(DeviceMemoryResource): + pass + cdef class CudaAsyncMemoryResource(DeviceMemoryResource): pass diff --git a/python/rmm/rmm/_lib/memory_resource.pyx b/python/rmm/rmm/_lib/memory_resource.pyx index 84e170beb..5030c5d2d 100644 --- a/python/rmm/rmm/_lib/memory_resource.pyx +++ b/python/rmm/rmm/_lib/memory_resource.pyx @@ -377,24 +377,29 @@ cdef class ManagedMemoryResource(DeviceMemoryResource): cdef class SystemMemoryResource(DeviceMemoryResource): + def __cinit__(self): + self.c_obj.reset( + new system_memory_resource() + ) + + def __init__(self): + """ + Memory resource that uses ``malloc``/``free`` for + allocation/deallocation. + """ + pass + + +cdef class SamHeadroomMemoryResource(DeviceMemoryResource): def __cinit__( self, - headroom=None + size_t headroom ): - if headroom is None: - self.c_obj.reset( - new system_memory_resource() - ) - else: - self.c_obj.reset( - new sam_headroom_memory_resource( - headroom - ) - ) + self.c_obj.reset(new sam_headroom_memory_resource(headroom)) def __init__( self, - headroom=None + size_t headroom ): """ Memory resource that uses ``malloc``/``free`` for diff --git a/python/rmm/rmm/mr.py b/python/rmm/rmm/mr.py index c2e881836..6eb94da0f 100644 --- a/python/rmm/rmm/mr.py +++ b/python/rmm/rmm/mr.py @@ -24,6 +24,7 @@ ManagedMemoryResource, PoolMemoryResource, PrefetchResourceAdaptor, + SamHeadroomMemoryResource, StatisticsResourceAdaptor, SystemMemoryResource, TrackingResourceAdaptor, @@ -55,6 +56,7 @@ "ManagedMemoryResource", "PoolMemoryResource", "PrefetchResourceAdaptor", + "SamHeadroomMemoryResource", "StatisticsResourceAdaptor", "SystemMemoryResource", "TrackingResourceAdaptor", diff --git a/python/rmm/rmm/tests/test_rmm.py b/python/rmm/rmm/tests/test_rmm.py index e28b3b8b4..c4fd90c45 100644 --- a/python/rmm/rmm/tests/test_rmm.py +++ b/python/rmm/rmm/tests/test_rmm.py @@ -112,9 +112,10 @@ def test_rmm_modes_system_memory(dtype, nelem, alloc, system, pool, headroom): array_tester(dtype, nelem, alloc) if system: - base_mr = rmm.mr.SystemMemoryResource( - headroom=1 << 20 if headroom else None - ) + if headroom: + base_mr = rmm.mr.SamHeadroomMemoryResource(headroom=1 << 20) + else: + base_mr = rmm.mr.SystemMemoryResource() else: base_mr = rmm.mr.CudaMemoryResource() if pool: @@ -451,7 +452,7 @@ def test_pool_memory_resource(dtype, nelem, alloc): + ( [ lambda: rmm.mr.SystemMemoryResource(), - lambda: rmm.mr.SystemMemoryResource(headroom=1 << 20), + lambda: rmm.mr.SamHeadroomMemoryResource(headroom=1 << 20), ] if _SYSTEM_MEMORY_SUPPORTED else [] @@ -481,7 +482,7 @@ def test_fixed_size_memory_resource(dtype, nelem, alloc, upstream): + ( [ lambda: rmm.mr.SystemMemoryResource(), - lambda: rmm.mr.SystemMemoryResource(headroom=1 << 20), + lambda: rmm.mr.SamHeadroomMemoryResource(headroom=1 << 20), ] if _SYSTEM_MEMORY_SUPPORTED else []