rapidsai · harrism · Jan 10, 2019 · Dec 21, 2018 · Dec 21, 2018 · Dec 21, 2018
diff --git a/.gitignore b/.gitignore
@@ -8,6 +8,7 @@ __pycache__
 .cache
 .coverage
 .vscode
+*.code-workspace
 *.swp
 *.pytest_cache
 DartConfiguration.tcl

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,23 +2,27 @@
 
 ## New Features
 
-- PR #1: Spun off RMM from cuDF into its own repository.
-
+ - PR #2 Added CUDA Managed Memory allocation mode
+ 
 ## Improvements
 
-- CUDF PR #472 RMM: Created centralized rmm::device_vector alias and rmm::exec_policy
-- CUDF PR #465 Added templated C++ API for RMM to avoid explicit cast to `void**`
-
-RMM was initially implemented as part of cuDF, so we include the relevant changelog history below.
+## Bug Fixes
 
-# cuDF 0.4.0 (05 Dec 2018)
+# RMM 0.4.0 (20 Dec 2018)
 
 ## New Features
 
+- PR #1 Spun off RMM from cuDF into its own repository.
+
 ## Improvements
+
+- CUDF PR #472 RMM: Created centralized rmm::device_vector alias and rmm::exec_policy
+- CUDF PR #465 Added templated C++ API for RMM to avoid explicit cast to `void**`
 
 ## Bug Fixes
-
+
+
+RMM was initially implemented as part of cuDF, so we include the relevant changelog history below. 
 
 # cuDF 0.3.0 (23 Nov 2018)
 

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -17,6 +17,21 @@ cmake_minimum_required(VERSION 3.12 FATAL_ERROR)
 
 project(RMM VERSION 0.4.0 LANGUAGES C CXX CUDA)
 
+###################################################################################################
+# - build type ------------------------------------------------------------------------------------
+
+# Set a default build type if none was specified
+set(DEFAULT_BUILD_TYPE "Release")
+
+if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
+  message(STATUS "Setting build type to '${DEFAULT_BUILD_TYPE}' since none specified.")
+  set(CMAKE_BUILD_TYPE "${DEFAULT_BUILD_TYPE}" CACHE
+      STRING "Choose the type of build." FORCE)
+  # Set the possible values of build type for cmake-gui
+  set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS
+    "Debug" "Release" "MinSizeRel" "RelWithDebInfo")
+endif()
+
 ###################################################################################################
 # - compiler options ------------------------------------------------------------------------------
 
@@ -32,9 +47,6 @@ endif(CMAKE_COMPILER_IS_GNUCXX)
 set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D_GLIBCXX_USE_CXX11_ABI=0")
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_GLIBCXX_USE_CXX11_ABI=0")
 
-# set default build type
-set(CMAKE_BUILD_TYPE "Release")
-
 option(BUILD_TESTS "Configure CMake to build tests"
        ON)
 
@@ -128,9 +140,3 @@ add_custom_command(OUTPUT INSTALL_PYTHON_CFFI
                    VERBATIM)
 
 add_custom_target(install_python DEPENDS rmm PYTHON_CFFI INSTALL_PYTHON_CFFI)
-
-
-
-
-
-
diff --git a/include/memory.h b/include/memory.h
@@ -41,12 +41,25 @@ typedef enum
   N_RMM_ERROR                 //< Count of error types
 } rmmError_t;
 
+/** ---------------------------------------------------------------------------*
+ * @brief RMM allocation mode settings
+ *
+ * These settings can be ORed together. For example to use a pool of managed
+ * memory, use `mode = PoolAllocation | CudaManagedMemory`.
+ * --------------------------------------------------------------------------**/
 typedef enum
 {
   CudaDefaultAllocation = 0,  //< Use cudaMalloc for allocation
-  PoolAllocation,             //< Use pool suballocation strategy
+  PoolAllocation        = 1,  //< Use pool suballocation strategy
+  CudaManagedMemory     = 2,  //< Use cudaMallocManaged rather than cudaMalloc
 } rmmAllocationMode_t;
 
+/** ---------------------------------------------------------------------------*
+ * @brief Options for initializing the memory manager
+ *
+ * If set to zero, `initial_pool_size` defaults to half of the total GPU memory
+ * for the current device.
+ * --------------------------------------------------------------------------**/
 typedef struct
 {
   rmmAllocationMode_t allocation_mode; //< Allocation strategy to use

diff --git a/include/memory.hpp b/include/memory.hpp
@@ -101,10 +101,6 @@ class LogIt {
   bool usageLogging;
 };
 
-inline bool usePoolAllocator() {
-  return Manager::getOptions().allocation_mode == PoolAllocation;
-}
-
 /** ---------------------------------------------------------------------------*
  * @brief Allocate memory and return a pointer to device memory.
  *
@@ -126,13 +122,18 @@ inline rmmError_t alloc(T** ptr, size_t size, cudaStream_t stream, const char* f
 
   if (!ptr && !size) {
     return RMM_SUCCESS;
+  } else if( !size ) {
+    ptr[0] = nullptr;
+    return RMM_SUCCESS;
   }
 
   if (!ptr) return RMM_ERROR_INVALID_ARGUMENT;
 
-  if (rmm::usePoolAllocator()) {
+  if (rmm::Manager::usePoolAllocator()) {
     RMM_CHECK(rmm::Manager::getInstance().registerStream(stream));
     RMM_CHECK_CNMEM(cnmemMalloc(reinterpret_cast<void**>(ptr), size, stream));
+  } else if (rmm::Manager::useManagedMemory()) {
+    RMM_CHECK_CUDA(cudaMallocManaged(reinterpret_cast<void**>(ptr), size));
   } else
     RMM_CHECK_CUDA(cudaMalloc(reinterpret_cast<void**>(ptr), size));
 
@@ -167,15 +168,21 @@ inline rmmError_t realloc(T** ptr, size_t new_size, cudaStream_t stream,
 
   if (!ptr) return RMM_ERROR_INVALID_ARGUMENT;
 
-  if (rmm::usePoolAllocator()) {
+  if (rmm::Manager::usePoolAllocator()) {
     RMM_CHECK(rmm::Manager::getInstance().registerStream(stream));
     RMM_CHECK_CNMEM(cnmemFree(*reinterpret_cast<void**>(ptr), stream));
     RMM_CHECK_CNMEM(
         cnmemMalloc(reinterpret_cast<void**>(ptr), new_size, stream));
   } else {
     RMM_CHECK_CUDA(cudaFree(*ptr));
-    RMM_CHECK_CUDA(cudaMalloc(reinterpret_cast<void**>(ptr), new_size));
+    if (!new_size)
+      ptr[0] = nullptr;
+    else if (rmm::Manager::useManagedMemory())
+      RMM_CHECK_CUDA(cudaMallocManaged(reinterpret_cast<void**>(ptr), new_size));
+    else
+      RMM_CHECK_CUDA(cudaMalloc(reinterpret_cast<void**>(ptr), new_size));
   }
+
   log.setPointer(*ptr);
   return RMM_SUCCESS;
 }
@@ -198,7 +205,7 @@ inline rmmError_t realloc(T** ptr, size_t new_size, cudaStream_t stream,
 inline rmmError_t free(void* ptr, cudaStream_t stream, const char* file,
                    unsigned int line) {
   rmm::LogIt log(rmm::Logger::Free, ptr, 0, stream, file, line);
-  if (rmm::usePoolAllocator())
+  if (rmm::Manager::usePoolAllocator())
     RMM_CHECK_CNMEM(cnmemFree(ptr, stream));
   else
     RMM_CHECK_CUDA(cudaFree(ptr));

diff --git a/python/librmm_cffi/librmm_config.py b/python/librmm_cffi/librmm_config.py
@@ -25,6 +25,13 @@
 # False means to use default cudaMalloc
 use_pool_allocator = False
 
+# Whether to use managed memory for base allocation
+# True means to use cudaMallocManaged
+# False means to use cudaMalloc
+# Can be combined with `use_pool_allocator` to 
+# create a managed memory pool allocator
+use_managed_memory = False
+
 # When `use_pool_allocator` is true, this indicates the initial pool size.
 # Zero is used to indicate the default size, which currently is 1/2 total GPU
 # memory.

diff --git a/python/librmm_cffi/wrapper.py b/python/librmm_cffi/wrapper.py
@@ -77,8 +77,14 @@ def initialize(self):
         """Initializes the RMM library using the options set in the
            librmm_config module
         """
+        allocation_mode = 0
+        if rmm_cfg.use_pool_allocator:
+            allocation_mode = allocation_mode | self.PoolAllocation
+        if rmm_cfg.use_managed_memory:
+            allocation_mode = allocation_mode | self.CudaManagedMemory
+
         opts = self._ffi.new("rmmOptions_t *",
-                             [rmm_cfg.use_pool_allocator,
+                             [allocation_mode,
                               rmm_cfg.initial_pool_size,
                               rmm_cfg.enable_logging])
         return self.rmmInitialize(opts)

diff --git a/python/tests/test_rmm.py b/python/tests/test_rmm.py
@@ -4,13 +4,9 @@
 import numpy as np
 
 from librmm_cffi import librmm as rmm
+from librmm_cffi import librmm_config as rmm_cfg
 
-_dtypes = [np.int32]
-_nelems = [1, 2, 7, 8, 9, 32, 128]
-
-
-@pytest.mark.parametrize('dtype,nelem', list(product(_dtypes, _nelems)))
-def test_rmm_alloc(dtype, nelem):
+def array_tester(dtype, nelem):
     # data
     h_in = np.full(nelem, 3.2, dtype)
     h_result = np.empty(nelem, dtype)
@@ -29,6 +25,26 @@ def test_rmm_alloc(dtype, nelem):
     np.testing.assert_array_equal(h_result, h_in)
 
 
+_dtypes = [np.int32]
+_nelems = [1, 2, 7, 8, 9, 32, 128]
+
+
+@pytest.mark.parametrize('dtype,nelem', list(product(_dtypes, _nelems)))
+def test_rmm_alloc(dtype, nelem):
+    array_tester(dtype, nelem)
+
+# Test all combinations of default/managed and pooled/non-pooled allocation
+@pytest.mark.parametrize('managed, pool', 
+                         list(product([False, True], [False, True])))
+def test_rmm_modes(managed, pool):
+    rmm.finalize()
+    rmm_cfg.use_managed_memory = managed
+    rmm_cfg.use_pool_allocator = pool
+    rmm.initialize()
+
+    array_tester(np.int32, 128)
+
+
 def test_rmm_csv_log():
     dtype = np.int32
     nelem = 1024

diff --git a/src/memory.cpp b/src/memory.cpp
@@ -58,7 +58,7 @@ rmmError_t rmmInitialize(rmmOptions_t *options)
         rmm::Manager::setOptions(*options);
     }
 
-    if (rmm::usePoolAllocator())
+    if (rmm::Manager::usePoolAllocator())
     {
         cnmemDevice_t dev;
         RMM_CHECK_CUDA( cudaGetDevice(&(dev.device)) );
@@ -68,15 +68,16 @@ rmmError_t rmmInitialize(rmmOptions_t *options)
         cudaStream_t streams[1]; streams[0] = 0;
         dev.streams = streams;
         dev.streamSizes = 0;
-        RMM_CHECK_CNMEM( cnmemInit(1, &dev, 0) );
+        unsigned flags = rmm::Manager::useManagedMemory() ? CNMEM_FLAGS_MANAGED : 0;
+        RMM_CHECK_CNMEM( cnmemInit(1, &dev, flags) );
     }
     return RMM_SUCCESS;
 }
 
 // Shutdown memory manager.
 rmmError_t rmmFinalize()
 {
-    if (rmm::usePoolAllocator())
+    if (rmm::Manager::usePoolAllocator())
         RMM_CHECK_CNMEM( cnmemFinalize() );
 
     rmm::Manager::getInstance().finalize();
@@ -121,7 +122,7 @@ rmmError_t rmmGetAllocationOffset(ptrdiff_t *offset,
 // with the stream.
 rmmError_t rmmGetInfo(size_t *freeSize, size_t *totalSize, cudaStream_t stream)
 {
-    if (rmm::usePoolAllocator())
+    if (rmm::Manager::usePoolAllocator())
     {
         RMM_CHECK( rmm::Manager::getInstance().registerStream(stream) );
         RMM_CHECK_CNMEM( cnmemMemGetInfo(freeSize, totalSize, stream) );

diff --git a/src/memory_manager.h b/src/memory_manager.h
@@ -164,6 +164,36 @@ namespace rmm
          * --------------------------------------------------------------------------**/
         static rmmOptions_t getOptions() { return getInstance().options; }
 
+        /** ---------------------------------------------------------------------------*
+         * @brief Returns true when pool allocation is enabled
+         * 
+         * @return true if pool allocation is enabled
+         * @return false if pool allocation is disabled
+         * --------------------------------------------------------------------------**/
+        static inline bool usePoolAllocator() {
+            return getOptions().allocation_mode & PoolAllocation;
+        }
+
+        /** ---------------------------------------------------------------------------*
+         * @brief Returns true if CUDA Managed Memory allocation is enabled
+         * 
+         * @return true if CUDA Managed Memory allocation is enabled
+         * @return false if CUDA Managed Memory allocation is disabled
+         * --------------------------------------------------------------------------**/
+        static inline bool useManagedMemory() {
+            return getOptions().allocation_mode & CudaManagedMemory;
+        }
+
+        /** ---------------------------------------------------------------------------*
+         * @brief Returns true when CUDA default allocation is enabled
+         *          * 
+         * @return true if CUDA default allocation is enabled
+         * @return false if CUDA default allocation is disabled
+         * --------------------------------------------------------------------------**/
+        inline bool useCudaDefaultAllocator() {
+            return CudaDefaultAllocation == getOptions().allocation_mode;
+        }
+
         /** ---------------------------------------------------------------------------*
          * @brief Shut down the Manager (clears the context)
          * 
@@ -187,7 +217,7 @@ namespace rmm
             std::lock_guard<std::mutex> guard(streams_mutex);
             if (registered_streams.empty() || 0 == registered_streams.count(stream)) {
                 registered_streams.insert(stream);
-                if (stream && PoolAllocation == options.allocation_mode) // don't register the null stream with CNMem
+                if (stream && usePoolAllocator()) // don't register the null stream with CNMem
                     RMM_CHECK_CNMEM( cnmemRegisterStream(stream) );
             }
             return RMM_SUCCESS;
@@ -207,4 +237,4 @@ namespace rmm
     };    
 }
 
-#endif // MEMORY_MANAGER_H
+#endif // MEMORY_MANAGER_H
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
@@ -47,11 +47,3 @@ ConfigureTest(RMM_TEST "${RMM_TEST_SRC}")
 ###################################################################################################
 
 enable_testing()
-
-
-
-
-
-
-
-
Original file line number	Diff line number	Diff line change
Expand Up		@@ -47,11 +47,3 @@ ConfigureTest(RMM_TEST "${RMM_TEST_SRC}")
		###################################################################################################

		enable_testing()