Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[REVIEW] Add CUDA Managed Memory (Unified Memory) Allocation Mode #2

Merged
merged 13 commits into from
Jan 10, 2019
Merged
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ __pycache__
.cache
.coverage
.vscode
*.code-workspace
*.swp
*.pytest_cache
DartConfiguration.tcl
Expand Down
20 changes: 12 additions & 8 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,23 +2,27 @@

## New Features

- PR #1: Spun off RMM from cuDF into its own repository.

- PR #2 Added CUDA Managed Memory allocation mode
## Improvements

- CUDF PR #472 RMM: Created centralized rmm::device_vector alias and rmm::exec_policy
- CUDF PR #465 Added templated C++ API for RMM to avoid explicit cast to `void**`

RMM was initially implemented as part of cuDF, so we include the relevant changelog history below.
## Bug Fixes

# cuDF 0.4.0 (05 Dec 2018)
# RMM 0.4.0 (20 Dec 2018)

## New Features

- PR #1 Spun off RMM from cuDF into its own repository.

## Improvements

- CUDF PR #472 RMM: Created centralized rmm::device_vector alias and rmm::exec_policy
- CUDF PR #465 Added templated C++ API for RMM to avoid explicit cast to `void**`

## Bug Fixes



RMM was initially implemented as part of cuDF, so we include the relevant changelog history below.

# cuDF 0.3.0 (23 Nov 2018)

Expand Down
24 changes: 15 additions & 9 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,21 @@ cmake_minimum_required(VERSION 3.12 FATAL_ERROR)

project(RMM VERSION 0.4.0 LANGUAGES C CXX CUDA)

###################################################################################################
# - build type ------------------------------------------------------------------------------------

# Set a default build type if none was specified
set(DEFAULT_BUILD_TYPE "Release")

if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
message(STATUS "Setting build type to '${DEFAULT_BUILD_TYPE}' since none specified.")
set(CMAKE_BUILD_TYPE "${DEFAULT_BUILD_TYPE}" CACHE
STRING "Choose the type of build." FORCE)
# Set the possible values of build type for cmake-gui
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS
"Debug" "Release" "MinSizeRel" "RelWithDebInfo")
endif()

###################################################################################################
# - compiler options ------------------------------------------------------------------------------

Expand All @@ -32,9 +47,6 @@ endif(CMAKE_COMPILER_IS_GNUCXX)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D_GLIBCXX_USE_CXX11_ABI=0")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_GLIBCXX_USE_CXX11_ABI=0")

# set default build type
set(CMAKE_BUILD_TYPE "Release")

option(BUILD_TESTS "Configure CMake to build tests"
ON)

Expand Down Expand Up @@ -128,9 +140,3 @@ add_custom_command(OUTPUT INSTALL_PYTHON_CFFI
VERBATIM)

add_custom_target(install_python DEPENDS rmm PYTHON_CFFI INSTALL_PYTHON_CFFI)






15 changes: 14 additions & 1 deletion include/memory.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,25 @@ typedef enum
N_RMM_ERROR //< Count of error types
} rmmError_t;

/** ---------------------------------------------------------------------------*
* @brief RMM allocation mode settings
*
* These settings can be ORed together. For example to use a pool of managed
* memory, use `mode = PoolAllocation | CudaManagedMemory`.
* --------------------------------------------------------------------------**/
typedef enum
{
CudaDefaultAllocation = 0, //< Use cudaMalloc for allocation
PoolAllocation, //< Use pool suballocation strategy
PoolAllocation = 1, //< Use pool suballocation strategy
CudaManagedMemory = 2, //< Use cudaMallocManaged rather than cudaMalloc
} rmmAllocationMode_t;

/** ---------------------------------------------------------------------------*
* @brief Options for initializing the memory manager
*
* If set to zero, `initial_pool_size` defaults to half of the total GPU memory
* for the current device.
* --------------------------------------------------------------------------**/
typedef struct
{
rmmAllocationMode_t allocation_mode; //< Allocation strategy to use
Expand Down
23 changes: 15 additions & 8 deletions include/memory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,10 +101,6 @@ class LogIt {
bool usageLogging;
};

inline bool usePoolAllocator() {
return Manager::getOptions().allocation_mode == PoolAllocation;
}

/** ---------------------------------------------------------------------------*
* @brief Allocate memory and return a pointer to device memory.
*
Expand All @@ -126,13 +122,18 @@ inline rmmError_t alloc(T** ptr, size_t size, cudaStream_t stream, const char* f

if (!ptr && !size) {
return RMM_SUCCESS;
} else if( !size ) {
ptr[0] = nullptr;
return RMM_SUCCESS;
}

if (!ptr) return RMM_ERROR_INVALID_ARGUMENT;

if (rmm::usePoolAllocator()) {
if (rmm::Manager::usePoolAllocator()) {
RMM_CHECK(rmm::Manager::getInstance().registerStream(stream));
RMM_CHECK_CNMEM(cnmemMalloc(reinterpret_cast<void**>(ptr), size, stream));
} else if (rmm::Manager::useManagedMemory()) {
RMM_CHECK_CUDA(cudaMallocManaged(reinterpret_cast<void**>(ptr), size));
} else
RMM_CHECK_CUDA(cudaMalloc(reinterpret_cast<void**>(ptr), size));

Expand Down Expand Up @@ -167,15 +168,21 @@ inline rmmError_t realloc(T** ptr, size_t new_size, cudaStream_t stream,

if (!ptr) return RMM_ERROR_INVALID_ARGUMENT;

if (rmm::usePoolAllocator()) {
if (rmm::Manager::usePoolAllocator()) {
RMM_CHECK(rmm::Manager::getInstance().registerStream(stream));
RMM_CHECK_CNMEM(cnmemFree(*reinterpret_cast<void**>(ptr), stream));
RMM_CHECK_CNMEM(
cnmemMalloc(reinterpret_cast<void**>(ptr), new_size, stream));
} else {
RMM_CHECK_CUDA(cudaFree(*ptr));
RMM_CHECK_CUDA(cudaMalloc(reinterpret_cast<void**>(ptr), new_size));
if (!new_size)
ptr[0] = nullptr;
else if (rmm::Manager::useManagedMemory())
RMM_CHECK_CUDA(cudaMallocManaged(reinterpret_cast<void**>(ptr), new_size));
else
RMM_CHECK_CUDA(cudaMalloc(reinterpret_cast<void**>(ptr), new_size));
}

log.setPointer(*ptr);
return RMM_SUCCESS;
}
Expand All @@ -198,7 +205,7 @@ inline rmmError_t realloc(T** ptr, size_t new_size, cudaStream_t stream,
inline rmmError_t free(void* ptr, cudaStream_t stream, const char* file,
unsigned int line) {
rmm::LogIt log(rmm::Logger::Free, ptr, 0, stream, file, line);
if (rmm::usePoolAllocator())
if (rmm::Manager::usePoolAllocator())
RMM_CHECK_CNMEM(cnmemFree(ptr, stream));
else
RMM_CHECK_CUDA(cudaFree(ptr));
Expand Down
7 changes: 7 additions & 0 deletions python/librmm_cffi/librmm_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,13 @@
# False means to use default cudaMalloc
use_pool_allocator = False

# Whether to use managed memory for base allocation
# True means to use cudaMallocManaged
# False means to use cudaMalloc
# Can be combined with `use_pool_allocator` to
# create a managed memory pool allocator
use_managed_memory = False

# When `use_pool_allocator` is true, this indicates the initial pool size.
# Zero is used to indicate the default size, which currently is 1/2 total GPU
# memory.
Expand Down
8 changes: 7 additions & 1 deletion python/librmm_cffi/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,14 @@ def initialize(self):
"""Initializes the RMM library using the options set in the
librmm_config module
"""
allocation_mode = 0
if rmm_cfg.use_pool_allocator:
allocation_mode = allocation_mode | self.PoolAllocation
if rmm_cfg.use_managed_memory:
allocation_mode = allocation_mode | self.CudaManagedMemory

opts = self._ffi.new("rmmOptions_t *",
[rmm_cfg.use_pool_allocator,
[allocation_mode,
rmm_cfg.initial_pool_size,
rmm_cfg.enable_logging])
return self.rmmInitialize(opts)
Expand Down
28 changes: 22 additions & 6 deletions python/tests/test_rmm.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,9 @@
import numpy as np

from librmm_cffi import librmm as rmm
from librmm_cffi import librmm_config as rmm_cfg

_dtypes = [np.int32]
_nelems = [1, 2, 7, 8, 9, 32, 128]


@pytest.mark.parametrize('dtype,nelem', list(product(_dtypes, _nelems)))
def test_rmm_alloc(dtype, nelem):
def array_tester(dtype, nelem):
# data
h_in = np.full(nelem, 3.2, dtype)
h_result = np.empty(nelem, dtype)
Expand All @@ -29,6 +25,26 @@ def test_rmm_alloc(dtype, nelem):
np.testing.assert_array_equal(h_result, h_in)


_dtypes = [np.int32]
_nelems = [1, 2, 7, 8, 9, 32, 128]


@pytest.mark.parametrize('dtype,nelem', list(product(_dtypes, _nelems)))
def test_rmm_alloc(dtype, nelem):
array_tester(dtype, nelem)

# Test all combinations of default/managed and pooled/non-pooled allocation
@pytest.mark.parametrize('managed, pool',
list(product([False, True], [False, True])))
def test_rmm_modes(managed, pool):
rmm.finalize()
rmm_cfg.use_managed_memory = managed
rmm_cfg.use_pool_allocator = pool
rmm.initialize()

array_tester(np.int32, 128)


def test_rmm_csv_log():
dtype = np.int32
nelem = 1024
Expand Down
9 changes: 5 additions & 4 deletions src/memory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ rmmError_t rmmInitialize(rmmOptions_t *options)
rmm::Manager::setOptions(*options);
}

if (rmm::usePoolAllocator())
if (rmm::Manager::usePoolAllocator())
{
cnmemDevice_t dev;
RMM_CHECK_CUDA( cudaGetDevice(&(dev.device)) );
Expand All @@ -68,15 +68,16 @@ rmmError_t rmmInitialize(rmmOptions_t *options)
cudaStream_t streams[1]; streams[0] = 0;
dev.streams = streams;
dev.streamSizes = 0;
RMM_CHECK_CNMEM( cnmemInit(1, &dev, 0) );
unsigned flags = rmm::Manager::useManagedMemory() ? CNMEM_FLAGS_MANAGED : 0;
RMM_CHECK_CNMEM( cnmemInit(1, &dev, flags) );
}
return RMM_SUCCESS;
}

// Shutdown memory manager.
rmmError_t rmmFinalize()
{
if (rmm::usePoolAllocator())
if (rmm::Manager::usePoolAllocator())
RMM_CHECK_CNMEM( cnmemFinalize() );

rmm::Manager::getInstance().finalize();
Expand Down Expand Up @@ -121,7 +122,7 @@ rmmError_t rmmGetAllocationOffset(ptrdiff_t *offset,
// with the stream.
rmmError_t rmmGetInfo(size_t *freeSize, size_t *totalSize, cudaStream_t stream)
{
if (rmm::usePoolAllocator())
if (rmm::Manager::usePoolAllocator())
{
RMM_CHECK( rmm::Manager::getInstance().registerStream(stream) );
RMM_CHECK_CNMEM( cnmemMemGetInfo(freeSize, totalSize, stream) );
Expand Down
34 changes: 32 additions & 2 deletions src/memory_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,36 @@ namespace rmm
* --------------------------------------------------------------------------**/
static rmmOptions_t getOptions() { return getInstance().options; }

/** ---------------------------------------------------------------------------*
* @brief Returns true when pool allocation is enabled
*
* @return true if pool allocation is enabled
* @return false if pool allocation is disabled
* --------------------------------------------------------------------------**/
static inline bool usePoolAllocator() {
return getOptions().allocation_mode & PoolAllocation;
}

/** ---------------------------------------------------------------------------*
* @brief Returns true if CUDA Managed Memory allocation is enabled
*
* @return true if CUDA Managed Memory allocation is enabled
* @return false if CUDA Managed Memory allocation is disabled
* --------------------------------------------------------------------------**/
static inline bool useManagedMemory() {
return getOptions().allocation_mode & CudaManagedMemory;
}

/** ---------------------------------------------------------------------------*
* @brief Returns true when CUDA default allocation is enabled
* *
* @return true if CUDA default allocation is enabled
* @return false if CUDA default allocation is disabled
* --------------------------------------------------------------------------**/
inline bool useCudaDefaultAllocator() {
return CudaDefaultAllocation == getOptions().allocation_mode;
}

/** ---------------------------------------------------------------------------*
* @brief Shut down the Manager (clears the context)
*
Expand All @@ -187,7 +217,7 @@ namespace rmm
std::lock_guard<std::mutex> guard(streams_mutex);
if (registered_streams.empty() || 0 == registered_streams.count(stream)) {
registered_streams.insert(stream);
if (stream && PoolAllocation == options.allocation_mode) // don't register the null stream with CNMem
if (stream && usePoolAllocator()) // don't register the null stream with CNMem
RMM_CHECK_CNMEM( cnmemRegisterStream(stream) );
}
return RMM_SUCCESS;
Expand All @@ -207,4 +237,4 @@ namespace rmm
};
}

#endif // MEMORY_MANAGER_H
#endif // MEMORY_MANAGER_H
8 changes: 0 additions & 8 deletions tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,3 @@ ConfigureTest(RMM_TEST "${RMM_TEST_SRC}")
###################################################################################################

enable_testing()








Loading